<?xml version="1.0"?>
<?xml-stylesheet type="text/css" href="http://index.cslt.org/mediawiki/skins/common/feed.css?303"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="zh-cn">
		<id>http://index.cslt.org/mediawiki/api.php?action=feedcontributions&amp;feedformat=atom&amp;user=Guojiayu</id>
		<title>cslt Wiki - 用户贡献 [zh-cn]</title>
		<link rel="self" type="application/atom+xml" href="http://index.cslt.org/mediawiki/api.php?action=feedcontributions&amp;feedformat=atom&amp;user=Guojiayu"/>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/%E7%89%B9%E6%AE%8A:%E7%94%A8%E6%88%B7%E8%B4%A1%E7%8C%AE/Guojiayu"/>
		<updated>2026-04-14T02:11:41Z</updated>
		<subtitle>用户贡献</subtitle>
		<generator>MediaWiki 1.23.3</generator>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Cslt-member-visitors</id>
		<title>Cslt-member-visitors</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Cslt-member-visitors"/>
				<updated>2017-09-14T02:46:35Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* Jiayu Guo (郭佳雨) */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;br /&gt;
==Professionals==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Engineers==&lt;br /&gt;
&lt;br /&gt;
=== Yuxin Zhang (张雨心） ===&lt;br /&gt;
[[文件:Zyx.jpg|200px]]&lt;br /&gt;
* Haixia research center&lt;br /&gt;
* 2016.10 -&lt;br /&gt;
* Finance processing&lt;br /&gt;
* [[媒体文件:Agreement zyx.jpg|Data Security Agreement]]&lt;br /&gt;
&lt;br /&gt;
==Students==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
===Jiyuan Zhang (张记袁）===&lt;br /&gt;
[[文件:Zhangjiyuan.png|200px]]&lt;br /&gt;
* PKU&lt;br /&gt;
* 2016.4-&lt;br /&gt;
* neural generation model&lt;br /&gt;
* [[媒体文件:An overview of machine translation.pptx|Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Zhangjy_data.jpg|Data_security_agreement]]&lt;br /&gt;
&lt;br /&gt;
===Ying Shi (石颖）===&lt;br /&gt;
[[文件:Ying_shi.jpg|200px]]&lt;br /&gt;
* BJTU&lt;br /&gt;
* 2016.6.15-&lt;br /&gt;
* Speech processing&lt;br /&gt;
* [[媒体文件:Shiying_bi_weekly_report.ppt|Bi-weekly report]]&lt;br /&gt;
*[[媒体文件:DataSecurityAgreement_YingShi.jpg|DataSecurityAgreement]]&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
===Yixiang Chen (陈怿翔)===&lt;br /&gt;
[[文件:Chenyx.jpg|200px]]&lt;br /&gt;
*University of China Mining&lt;br /&gt;
* 2016.7-&lt;br /&gt;
* Speech processing&lt;br /&gt;
* [[媒体文件:Chenyx_report.pdf |Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Chenyx data.jpg|Data_security_agreement]]&lt;br /&gt;
&lt;br /&gt;
===Shiyue Zhang(张诗悦)===&lt;br /&gt;
[[文件:Zhang Shiyue.jpg|200px]]&lt;br /&gt;
* BUTP&lt;br /&gt;
* 2016.9.06-&lt;br /&gt;
* Language processing&lt;br /&gt;
* [[媒体文件:1.pic hd.jpg| Data_security_agreement]]&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
=== Yang Wei (魏扬） ===&lt;br /&gt;
[[文件:Weiy_photo.jpg|200px]]&lt;br /&gt;
* BUPT&lt;br /&gt;
* 2016.10 -&lt;br /&gt;
* Speech processing&lt;br /&gt;
* [[媒体文件:Bi-monthly report weiy.pdf|Bi-monthly report]]&lt;br /&gt;
* [[媒体文件:Data agreement weiy.jpg|Data security agreement]]&lt;br /&gt;
&lt;br /&gt;
===Yanqing Wang（王延清）===&lt;br /&gt;
[[文件:wyq photo.jpeg|200px]]&lt;br /&gt;
* BUPT&lt;br /&gt;
* 2016.11-2017.2&lt;br /&gt;
* Speech processing&lt;br /&gt;
* [[媒体文件:Bi-weekly_report.pptx|Bi-weekly report]]&lt;br /&gt;
*[[媒体文件:DataSecurityAgreement wangyanqing.jpg|Data_security_agreement]]&lt;br /&gt;
&lt;br /&gt;
=== Yaodong Wang (王耀东) ===&lt;br /&gt;
[[文件:wangyd.jpg|200px]]&lt;br /&gt;
* CUFE&lt;br /&gt;
* 2016.12.22 -&lt;br /&gt;
* [[媒体文件:Bi_weekly_report.pptx |Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Data_security_Agreement.jpg|Data security agreement]]&lt;br /&gt;
* Financial processing&lt;br /&gt;
&lt;br /&gt;
=== Tongzheng Ren (任桐正) ===&lt;br /&gt;
[[文件:IcCardPicture.do2.jpeg|200px]]&lt;br /&gt;
* THU&lt;br /&gt;
* 2016.12.22 -&lt;br /&gt;
* [[媒体文件:利用LSTM预测时间序列.pptx|Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Data Security Agreement-Tongzheng Ren.jpg|Data security agreement]]&lt;br /&gt;
* Financial processing&lt;br /&gt;
&lt;br /&gt;
=== Shipan Ren (任师攀) ===&lt;br /&gt;
[[文件:Rsp.jpg|200px]]&lt;br /&gt;
* PKU&lt;br /&gt;
* 2017.05.10 -&lt;br /&gt;
* [[媒体文件:seq2seq.pptx|Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Agreement.jpg|Data security agreement]]&lt;br /&gt;
* Language processing&lt;br /&gt;
&lt;br /&gt;
=== Miao Zhang (张淼) ===&lt;br /&gt;
[[文件:miao.JPG|200px]]&lt;br /&gt;
* BUPT&lt;br /&gt;
* 2017.5.1 -&lt;br /&gt;
* [[媒体文件:Zm cough.pdf |Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Zm.JPG|Data security agreement]]&lt;br /&gt;
* Speech processing&lt;br /&gt;
&lt;br /&gt;
=== Xuejing Zhang (张学敬) ===&lt;br /&gt;
[[文件:Zhangxuejing.jpg|200px]]&lt;br /&gt;
* BISTU&lt;br /&gt;
* 2017.7.7 -&lt;br /&gt;
* [[媒体文件:Zhangxj.jpg|Data security agreement]]&lt;br /&gt;
* Language processing&lt;br /&gt;
&lt;br /&gt;
=== Xiaofei Kang (康晓非) ===&lt;br /&gt;
[[文件:头像.jpg|200px]]&lt;br /&gt;
* PKU&lt;br /&gt;
* 2017.7.17 -&lt;br /&gt;
* [[媒体文件:xxxx.jpg|Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Kangxf_Data.jpg|Data security agreement]]&lt;br /&gt;
* Speech processing&lt;br /&gt;
&lt;br /&gt;
=== Jiayu Guo (郭佳雨) ===&lt;br /&gt;
[[文件:1.jpg|200px]]&lt;br /&gt;
* PKU&lt;br /&gt;
* 2017.7.18 -&lt;br /&gt;
* [[媒体文件:security.jpg|Data security agreement]]&lt;br /&gt;
* Natural Laguage Processing&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:Security.jpg</id>
		<title>文件:Security.jpg</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:Security.jpg"/>
				<updated>2017-09-14T02:46:02Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：Guojiayu上传“文件:Security.jpg”的新版本&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:Security.jpg</id>
		<title>文件:Security.jpg</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:Security.jpg"/>
				<updated>2017-09-14T02:45:58Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：Guojiayu上传“文件:Security.jpg”的新版本&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:1.jpg</id>
		<title>文件:1.jpg</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:1.jpg"/>
				<updated>2017-09-14T02:34:19Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：Guojiayu上传“文件:1.jpg”的新版本&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:1.jpg</id>
		<title>文件:1.jpg</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:1.jpg"/>
				<updated>2017-09-14T02:34:17Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：Guojiayu上传“文件:1.jpg”的新版本&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Cslt-member-visitors</id>
		<title>Cslt-member-visitors</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Cslt-member-visitors"/>
				<updated>2017-09-14T02:33:32Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* Jiayu Guo (郭佳雨) */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;br /&gt;
==Professionals==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Engineers==&lt;br /&gt;
&lt;br /&gt;
=== Yuxin Zhang (张雨心） ===&lt;br /&gt;
[[文件:Zyx.jpg|200px]]&lt;br /&gt;
* Haixia research center&lt;br /&gt;
* 2016.10 -&lt;br /&gt;
* Finance processing&lt;br /&gt;
* [[媒体文件:Agreement zyx.jpg|Data Security Agreement]]&lt;br /&gt;
&lt;br /&gt;
==Students==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
===Jiyuan Zhang (张记袁）===&lt;br /&gt;
[[文件:Zhangjiyuan.png|200px]]&lt;br /&gt;
* PKU&lt;br /&gt;
* 2016.4-&lt;br /&gt;
* neural generation model&lt;br /&gt;
* [[媒体文件:An overview of machine translation.pptx|Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Zhangjy_data.jpg|Data_security_agreement]]&lt;br /&gt;
&lt;br /&gt;
===Ying Shi (石颖）===&lt;br /&gt;
[[文件:Ying_shi.jpg|200px]]&lt;br /&gt;
* BJTU&lt;br /&gt;
* 2016.6.15-&lt;br /&gt;
* Speech processing&lt;br /&gt;
* [[媒体文件:Shiying_bi_weekly_report.ppt|Bi-weekly report]]&lt;br /&gt;
*[[媒体文件:DataSecurityAgreement_YingShi.jpg|DataSecurityAgreement]]&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
===Yixiang Chen (陈怿翔)===&lt;br /&gt;
[[文件:Chenyx.jpg|200px]]&lt;br /&gt;
*University of China Mining&lt;br /&gt;
* 2016.7-&lt;br /&gt;
* Speech processing&lt;br /&gt;
* [[媒体文件:Chenyx_report.pdf |Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Chenyx data.jpg|Data_security_agreement]]&lt;br /&gt;
&lt;br /&gt;
===Shiyue Zhang(张诗悦)===&lt;br /&gt;
[[文件:Zhang Shiyue.jpg|200px]]&lt;br /&gt;
* BUTP&lt;br /&gt;
* 2016.9.06-&lt;br /&gt;
* Language processing&lt;br /&gt;
* [[媒体文件:1.pic hd.jpg| Data_security_agreement]]&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
=== Yang Wei (魏扬） ===&lt;br /&gt;
[[文件:Weiy_photo.jpg|200px]]&lt;br /&gt;
* BUPT&lt;br /&gt;
* 2016.10 -&lt;br /&gt;
* Speech processing&lt;br /&gt;
* [[媒体文件:Bi-monthly report weiy.pdf|Bi-monthly report]]&lt;br /&gt;
* [[媒体文件:Data agreement weiy.jpg|Data security agreement]]&lt;br /&gt;
&lt;br /&gt;
===Yanqing Wang（王延清）===&lt;br /&gt;
[[文件:wyq photo.jpeg|200px]]&lt;br /&gt;
* BUPT&lt;br /&gt;
* 2016.11-2017.2&lt;br /&gt;
* Speech processing&lt;br /&gt;
* [[媒体文件:Bi-weekly_report.pptx|Bi-weekly report]]&lt;br /&gt;
*[[媒体文件:DataSecurityAgreement wangyanqing.jpg|Data_security_agreement]]&lt;br /&gt;
&lt;br /&gt;
=== Yaodong Wang (王耀东) ===&lt;br /&gt;
[[文件:wangyd.jpg|200px]]&lt;br /&gt;
* CUFE&lt;br /&gt;
* 2016.12.22 -&lt;br /&gt;
* [[媒体文件:Bi_weekly_report.pptx |Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Data_security_Agreement.jpg|Data security agreement]]&lt;br /&gt;
* Financial processing&lt;br /&gt;
&lt;br /&gt;
=== Tongzheng Ren (任桐正) ===&lt;br /&gt;
[[文件:IcCardPicture.do2.jpeg|200px]]&lt;br /&gt;
* THU&lt;br /&gt;
* 2016.12.22 -&lt;br /&gt;
* [[媒体文件:利用LSTM预测时间序列.pptx|Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Data Security Agreement-Tongzheng Ren.jpg|Data security agreement]]&lt;br /&gt;
* Financial processing&lt;br /&gt;
&lt;br /&gt;
=== Shipan Ren (任师攀) ===&lt;br /&gt;
[[文件:Rsp.jpg|200px]]&lt;br /&gt;
* PKU&lt;br /&gt;
* 2017.05.10 -&lt;br /&gt;
* [[媒体文件:seq2seq.pptx|Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Agreement.jpg|Data security agreement]]&lt;br /&gt;
* Language processing&lt;br /&gt;
&lt;br /&gt;
=== Miao Zhang (张淼) ===&lt;br /&gt;
[[文件:miao.JPG|200px]]&lt;br /&gt;
* BUPT&lt;br /&gt;
* 2017.5.1 -&lt;br /&gt;
* [[媒体文件:Zm cough.pdf |Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Zm.JPG|Data security agreement]]&lt;br /&gt;
* Speech processing&lt;br /&gt;
&lt;br /&gt;
=== Xuejing Zhang (张学敬) ===&lt;br /&gt;
[[文件:Zhangxuejing.jpg|200px]]&lt;br /&gt;
* BISTU&lt;br /&gt;
* 2017.7.7 -&lt;br /&gt;
* [[媒体文件:Zhangxj.jpg|Data security agreement]]&lt;br /&gt;
* Language processing&lt;br /&gt;
&lt;br /&gt;
=== Xiaofei Kang (康晓非) ===&lt;br /&gt;
[[文件:头像.jpg|200px]]&lt;br /&gt;
* PKU&lt;br /&gt;
* 2017.7.17 -&lt;br /&gt;
* [[媒体文件:xxxx.jpg|Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Kangxf_Data.jpg|Data security agreement]]&lt;br /&gt;
* Speech processing&lt;br /&gt;
&lt;br /&gt;
=== Jiayu Guo (郭佳雨) ===&lt;br /&gt;
[[文件:xxxx.jpg|200px]]&lt;br /&gt;
* PKU&lt;br /&gt;
* 2017.7.18 -&lt;br /&gt;
* [[媒体文件:1.jpg|Data security agreement]]&lt;br /&gt;
* Natural Laguage Processing&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Cslt-member-visitors</id>
		<title>Cslt-member-visitors</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Cslt-member-visitors"/>
				<updated>2017-09-14T02:27:20Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* Students */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;br /&gt;
==Professionals==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
==Engineers==&lt;br /&gt;
&lt;br /&gt;
=== Yuxin Zhang (张雨心） ===&lt;br /&gt;
[[文件:Zyx.jpg|200px]]&lt;br /&gt;
* Haixia research center&lt;br /&gt;
* 2016.10 -&lt;br /&gt;
* Finance processing&lt;br /&gt;
* [[媒体文件:Agreement zyx.jpg|Data Security Agreement]]&lt;br /&gt;
&lt;br /&gt;
==Students==&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
===Jiyuan Zhang (张记袁）===&lt;br /&gt;
[[文件:Zhangjiyuan.png|200px]]&lt;br /&gt;
* PKU&lt;br /&gt;
* 2016.4-&lt;br /&gt;
* neural generation model&lt;br /&gt;
* [[媒体文件:An overview of machine translation.pptx|Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Zhangjy_data.jpg|Data_security_agreement]]&lt;br /&gt;
&lt;br /&gt;
===Ying Shi (石颖）===&lt;br /&gt;
[[文件:Ying_shi.jpg|200px]]&lt;br /&gt;
* BJTU&lt;br /&gt;
* 2016.6.15-&lt;br /&gt;
* Speech processing&lt;br /&gt;
* [[媒体文件:Shiying_bi_weekly_report.ppt|Bi-weekly report]]&lt;br /&gt;
*[[媒体文件:DataSecurityAgreement_YingShi.jpg|DataSecurityAgreement]]&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
===Yixiang Chen (陈怿翔)===&lt;br /&gt;
[[文件:Chenyx.jpg|200px]]&lt;br /&gt;
*University of China Mining&lt;br /&gt;
* 2016.7-&lt;br /&gt;
* Speech processing&lt;br /&gt;
* [[媒体文件:Chenyx_report.pdf |Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Chenyx data.jpg|Data_security_agreement]]&lt;br /&gt;
&lt;br /&gt;
===Shiyue Zhang(张诗悦)===&lt;br /&gt;
[[文件:Zhang Shiyue.jpg|200px]]&lt;br /&gt;
* BUTP&lt;br /&gt;
* 2016.9.06-&lt;br /&gt;
* Language processing&lt;br /&gt;
* [[媒体文件:1.pic hd.jpg| Data_security_agreement]]&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
=== Yang Wei (魏扬） ===&lt;br /&gt;
[[文件:Weiy_photo.jpg|200px]]&lt;br /&gt;
* BUPT&lt;br /&gt;
* 2016.10 -&lt;br /&gt;
* Speech processing&lt;br /&gt;
* [[媒体文件:Bi-monthly report weiy.pdf|Bi-monthly report]]&lt;br /&gt;
* [[媒体文件:Data agreement weiy.jpg|Data security agreement]]&lt;br /&gt;
&lt;br /&gt;
===Yanqing Wang（王延清）===&lt;br /&gt;
[[文件:wyq photo.jpeg|200px]]&lt;br /&gt;
* BUPT&lt;br /&gt;
* 2016.11-2017.2&lt;br /&gt;
* Speech processing&lt;br /&gt;
* [[媒体文件:Bi-weekly_report.pptx|Bi-weekly report]]&lt;br /&gt;
*[[媒体文件:DataSecurityAgreement wangyanqing.jpg|Data_security_agreement]]&lt;br /&gt;
&lt;br /&gt;
=== Yaodong Wang (王耀东) ===&lt;br /&gt;
[[文件:wangyd.jpg|200px]]&lt;br /&gt;
* CUFE&lt;br /&gt;
* 2016.12.22 -&lt;br /&gt;
* [[媒体文件:Bi_weekly_report.pptx |Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Data_security_Agreement.jpg|Data security agreement]]&lt;br /&gt;
* Financial processing&lt;br /&gt;
&lt;br /&gt;
=== Tongzheng Ren (任桐正) ===&lt;br /&gt;
[[文件:IcCardPicture.do2.jpeg|200px]]&lt;br /&gt;
* THU&lt;br /&gt;
* 2016.12.22 -&lt;br /&gt;
* [[媒体文件:利用LSTM预测时间序列.pptx|Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Data Security Agreement-Tongzheng Ren.jpg|Data security agreement]]&lt;br /&gt;
* Financial processing&lt;br /&gt;
&lt;br /&gt;
=== Shipan Ren (任师攀) ===&lt;br /&gt;
[[文件:Rsp.jpg|200px]]&lt;br /&gt;
* PKU&lt;br /&gt;
* 2017.05.10 -&lt;br /&gt;
* [[媒体文件:seq2seq.pptx|Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Agreement.jpg|Data security agreement]]&lt;br /&gt;
* Language processing&lt;br /&gt;
&lt;br /&gt;
=== Miao Zhang (张淼) ===&lt;br /&gt;
[[文件:miao.JPG|200px]]&lt;br /&gt;
* BUPT&lt;br /&gt;
* 2017.5.1 -&lt;br /&gt;
* [[媒体文件:Zm cough.pdf |Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:Zm.JPG|Data security agreement]]&lt;br /&gt;
* Speech processing&lt;br /&gt;
&lt;br /&gt;
=== Xuejing Zhang (张学敬) ===&lt;br /&gt;
[[文件:Zhangxuejing.jpg|200px]]&lt;br /&gt;
* BISTU&lt;br /&gt;
* 2017.7.7 -&lt;br /&gt;
* [[媒体文件:Zhangxj.jpg|Data security agreement]]&lt;br /&gt;
* Language processing&lt;br /&gt;
&lt;br /&gt;
=== Xiaofei Kang (康晓非) ===&lt;br /&gt;
[[文件:头像.jpg|200px]]&lt;br /&gt;
* PKU&lt;br /&gt;
* 2017.7.17 -&lt;br /&gt;
* [[媒体文件:xxxx.jpg|Bi-weekly report]]&lt;br /&gt;
* [[媒体文件:xxxx.jpg|Data security agreement]]&lt;br /&gt;
* Speech processing&lt;br /&gt;
&lt;br /&gt;
=== Jiayu Guo (郭佳雨) ===&lt;br /&gt;
[[文件:xxxx.jpg|200px]]&lt;br /&gt;
* PKU&lt;br /&gt;
* 2017.7.18 -&lt;br /&gt;
* [[媒体文件:xxxx.jpg|Data security agreement]]&lt;br /&gt;
* Speech processing&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-11</id>
		<title>NLP Status Report 2017-9-11</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-11"/>
				<updated>2017-09-11T07:36:12Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
||  &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* read the predict.py, memoryModule_decoder.py and seq2seq.py, understand the general process of prediction&lt;br /&gt;
||  &lt;br /&gt;
* start to run the model to see whether there are details uncertain or not.&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-11</id>
		<title>NLP Status Report 2017-9-11</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-11"/>
				<updated>2017-09-11T07:25:47Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
||  &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* read the predict.py, understand the general process of prediction&lt;br /&gt;
||  &lt;br /&gt;
* start to run the model to see whether there are details uncertain or not.&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-11</id>
		<title>NLP Status Report 2017-9-11</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-11"/>
				<updated>2017-09-11T07:22:23Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
||  &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* read the predict.py, understand the general process of prediction&lt;br /&gt;
||  &lt;br /&gt;
** read the predict.py, understand the general process of prediction&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-11</id>
		<title>NLP Status Report 2017-9-11</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-11"/>
				<updated>2017-09-11T07:21:59Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
||  &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* read the predict.py, understand the general process of prediction&lt;br /&gt;
||  &lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4</id>
		<title>NLP Status Report 2017-9-4</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4"/>
				<updated>2017-09-04T01:50:30Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document[http://cslt.riit.tsinghua.edu.cn/mediawiki/index.php/%E6%96%87%E4%BB%B6:VvPoem.docx]&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* read the released information of other toolkits for nmt&lt;br /&gt;
* cleaned up the code&lt;br /&gt;
* wrote the documents [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/5/5c/Manual_V1.0.docx] [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/3/38/Manual_V0.10.docx]&lt;br /&gt;
||  &lt;br /&gt;
* write the papers of our baseline system&lt;br /&gt;
* read augmented nmt code&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* Clean up the code of predict.py&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* configuration environment of TF-0.12 or update the code to adapt TF-1.0&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4</id>
		<title>NLP Status Report 2017-9-4</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4"/>
				<updated>2017-09-04T01:34:46Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document[http://cslt.riit.tsinghua.edu.cn/mediawiki/index.php/%E6%96%87%E4%BB%B6:VvPoem.docx]&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* read the released information of other toolkits for nmt&lt;br /&gt;
* cleaned up the code&lt;br /&gt;
* wrote the documents [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/5/5c/Manual_V1.0.docx] [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/3/38/Manual_V0.10.docx]&lt;br /&gt;
||  &lt;br /&gt;
* write the papers of our baseline system&lt;br /&gt;
* read augmented nmt code&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* Clean up the code of predict.py&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-09-01T00:41:03Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jiayu Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jiayu Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* check experimental results&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jiayu Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jiayu Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jiayu Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Jiayu Guo || 11:30 || 21:00 || 9 || &lt;br /&gt;
* learn VV model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Jiayu Guo || 10:00 || 20:00 || 10 || &lt;br /&gt;
* clean up the code&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-31T02:50:35Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jiayu Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jiayu Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jiayu Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jiayu Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jiayu Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Jiayu Guo || 11:30 || 21:00 || 9 || &lt;br /&gt;
* learn VV model&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-29T12:06:19Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jiayu Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jiayu Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jiayu Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jiayu Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jiayu Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-29T12:04:59Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jia Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jia Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jia Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jia Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jia Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jia Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jia Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-28T11:18:54Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jia Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jia Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jia Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jia Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jia Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jia Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-28T11:18:01Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jia Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jia Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jia Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jia Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jia Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jia Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28</id>
		<title>NLP Status Report 2017-8-28</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28"/>
				<updated>2017-08-28T06:42:04Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document[http://cslt.riit.tsinghua.edu.cn/mediawiki/index.php/%E6%96%87%E4%BB%B6:VvPoem.docx]&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* read the released information of other toolkits for nmt&lt;br /&gt;
* cleaned up the code&lt;br /&gt;
* wrote the documents &lt;br /&gt;
||  &lt;br /&gt;
* write the papers of our baseline system&lt;br /&gt;
* read augmented nmt code&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* learn the source code of the mode&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* learn the source code of seq2seq model and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28</id>
		<title>NLP Status Report 2017-8-28</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28"/>
				<updated>2017-08-28T05:04:28Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date!!People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;7&amp;quot;|2017.8.28&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|zhangshuai &lt;br /&gt;
||  &lt;br /&gt;
* learn model source code&lt;br /&gt;
||  &lt;br /&gt;
* learn tensorflow and source code&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28</id>
		<title>NLP Status Report 2017-8-28</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28"/>
				<updated>2017-08-28T05:03:25Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date!!People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;7&amp;quot;|2017.8.28&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|guojiayu &lt;br /&gt;
||  &lt;br /&gt;
* learn the tensorflow&lt;br /&gt;
||  &lt;br /&gt;
* &lt;br /&gt;
|- understand the source code&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28</id>
		<title>NLP Status Report 2017-8-28</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28"/>
				<updated>2017-08-28T05:02:56Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：以“{| class=&amp;quot;wikitable&amp;quot; !Date!!People !! Last Week !! This Week |- | rowspan=&amp;quot;7&amp;quot;|2017.8.28  |- |guojiayu  ||   * learn the tensorflow ||   *  |-  |}”为内容创建页面&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date!!People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;7&amp;quot;|2017.8.28&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|guojiayu &lt;br /&gt;
||  &lt;br /&gt;
* learn the tensorflow&lt;br /&gt;
||  &lt;br /&gt;
* &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-27T09:46:57Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jia Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jia Guo || 9:10 || 23:00 || 9.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jia Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jia Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jia Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jia Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-22T13:57:34Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jia Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jia Guo || 9:10 || 23:00 || 9.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-22T00:29:01Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jia Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T11:24:21Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T11:22:52Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T11:19:12Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T11:14:49Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-21T06:47:46Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*done some work about code refactoring for poem system &lt;br /&gt;
|| &lt;br /&gt;
*plan to complete code refactoring for poem system&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* organized all the experimental results(our baseline system,Moses,THUMT) &lt;br /&gt;
* trained and tested translation models（Toolkit:THUMT ）&lt;br /&gt;
* compared with our system&lt;br /&gt;
||&lt;br /&gt;
* prepare to release the baseline system（tensorflow1.0 version）&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* process data and run model;&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(pairs of sentence/the quality——the modern language text includes context information).&lt;br /&gt;
||  &lt;br /&gt;
*plan to read source code of seq2seq model and learn tensorflow;&lt;br /&gt;
*plan to read a paper named Automatic Long Sentence Segmentation for NMT&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-21T06:41:54Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*done some work about code refactoring for poem system &lt;br /&gt;
|| &lt;br /&gt;
*plan to complete code refactoring for poem system&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* organized all the experimental results(our baseline system,Moses,THUMT) &lt;br /&gt;
* trained and tested translation models（Toolkit:THUMT ）&lt;br /&gt;
* compared with our system&lt;br /&gt;
||&lt;br /&gt;
* prepare to release the baseline system（tensorflow1.0 version）&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* process data and run model;&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(pairs of sentence/the quality——the modern language text includes context information).&lt;br /&gt;
||  &lt;br /&gt;
*plan to read source code of seq2seq model;&lt;br /&gt;
*plan to read a paper named Automatic Long Sentence Segmentation for NMT&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-21T06:37:32Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*done some work about code refactoring for poem system &lt;br /&gt;
|| &lt;br /&gt;
*plan to complete code refactoring for poem system&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* organized all the experimental results(our baseline system,Moses,THUMT) &lt;br /&gt;
* trained and tested translation models（Toolkit:THUMT ）&lt;br /&gt;
* compared with our system&lt;br /&gt;
||&lt;br /&gt;
* prepare to release the baseline system（tensorflow1.0 version）&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* process data and run model;&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
||  &lt;br /&gt;
*plan to read source code of seq2seq model;&lt;br /&gt;
*plan to read a paper named Automatic Long Sentence Segmentation for NMT&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-21T04:25:43Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date!!People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;7&amp;quot;|2017.8.21&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|GuoJiayu&lt;br /&gt;
||  &lt;br /&gt;
* process data and run model;&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* read source code of seq2seq model;&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-21T03:30:13Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date!!People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;7&amp;quot;|2017.8.21&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|GuoJiayu&lt;br /&gt;
||  &lt;br /&gt;
* process data and run model;&lt;br /&gt;
||  &lt;br /&gt;
* read source code of seq2seq model;&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-21T03:28:22Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;7&amp;quot;|2017.8.21&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|guojiayu&lt;br /&gt;
||  &lt;br /&gt;
* &lt;br /&gt;
||  &lt;br /&gt;
* read seq2seq_model source code;&lt;br /&gt;
|-&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-21T03:27:48Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date!!People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;7&amp;quot;|2017.8.21&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|guojiayu&lt;br /&gt;
||  &lt;br /&gt;
* &lt;br /&gt;
||  &lt;br /&gt;
* read seq2seq_model source code;&lt;br /&gt;
|-&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-21T02:19:18Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：撤销Guojiayu（讨论）的版本28325&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-21T02:16:54Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;*checkpoint-100000 translation model BLEU： 11.11&lt;br /&gt;
*source1:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target1:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans1：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source2:神大用则竭，形大劳则敝，形神离则死 。&lt;br /&gt;
*target2:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。&lt;br /&gt;
*trans2: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source3:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target3:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！&lt;br /&gt;
*trans3: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-21T02:15:55Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：以“checkpoint-100000 translation model BLEU： 11.11 source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。 target:在秦国的...”为内容创建页面&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;checkpoint-100000 translation model BLEU： 11.11&lt;br /&gt;
source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
source:神大用则竭，形大劳则敝，形神离则死 。&lt;br /&gt;
target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。&lt;br /&gt;
trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！&lt;br /&gt;
trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-20T13:22:14Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-s&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/19&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/20&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-20T13:20:52Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* NLP Schedule */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-s&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/19&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/20&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-13T04:20:09Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-s&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-12T15:34:00Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-s&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-12T15:31:27Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-s&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
Examples are as follows.&lt;br /&gt;
source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
       &lt;br /&gt;
&lt;br /&gt;
source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
Conclusion:&lt;br /&gt;
1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-12T14:35:56Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-s&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-12T14:31:28Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-10T01:52:08Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-10T01:44:22Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-10T01:29:07Z</updated>
		
		<summary type="html">&lt;p&gt;Guojiayu：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Guojiayu</name></author>	</entry>

	</feed>