<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
  <title>Houyi AI Research Flow</title>
  <link>https://houyi.blog/</link>
  <description>AGI research learning notes and work explainers by Houyi Li.</description>
  <language>en</language>
  <lastBuildDate>Wed, 13 May 2026 03:05:15 GMT</lastBuildDate>
<item>
  <title>Can We Train an MoE Model with the Same Total Parameters and Performance as Dense?</title>
  <link>https://houyi.blog/en/moe-equal-resources-p1/</link>
  <guid isPermaLink="true">https://houyi.blog/en/moe-equal-resources-p1/</guid>
  <description>An ICLR 2026 Oral paper explainer: MoE needs a more aggressive data scaling strategy.</description>
  <pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate>
  <author>lihouyi2013@hotmail.com (Houyi Li)</author>
  <category>MoE</category><category>Pretrain</category><category>LLM</category><category>ICLR 2026 oral</category><category>Data Scaling</category><category>Data Reuse</category>
</item>
<item>
  <title>能否训出和 Dense 总参相同、性能相同的 MoE 模型？</title>
  <link>https://houyi.blog/zh/moe-equal-resources-p1/</link>
  <guid isPermaLink="true">https://houyi.blog/zh/moe-equal-resources-p1/</guid>
  <description>ICLR 2026 Oral Paper的解读：MoE 需要更加激进的 Data Scaling 策略</description>
  <pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate>
  <author>lihouyi2013@hotmail.com (Houyi Li)</author>
  <category>MoE</category><category>Pretrain</category><category>LLM</category><category>ICLR 2026 oral</category><category>Data Scaling</category><category>Data Reuse</category>
</item>
<item>
  <title>Appendix: Can We Train an MoE Model with the Same Total Parameters and Performance as Dense?</title>
  <link>https://houyi.blog/en/moe-equal-resources-p2/</link>
  <guid isPermaLink="true">https://houyi.blog/en/moe-equal-resources-p2/</guid>
  <description>An ICLR 2026 Oral paper explainer: MoE needs a more aggressive data scaling strategy.</description>
  <pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate>
  <author>lihouyi2013@hotmail.com (Houyi Li)</author>
  <category>MoE</category><category>Pretrain</category><category>LLM</category><category>ICLR 2026 oral</category><category>Data Scaling</category><category>Data Reuse</category><category>APPENDIX</category>
</item>
<item>
  <title>附录：能否训出和 Dense 总参相同、性能相同的 MoE 模型？</title>
  <link>https://houyi.blog/zh/moe-equal-resources-p2/</link>
  <guid isPermaLink="true">https://houyi.blog/zh/moe-equal-resources-p2/</guid>
  <description>ICLR 2026 Oral Paper的解读：MoE 需要更加激进的 Data Scaling 策略</description>
  <pubDate>Fri, 08 May 2026 00:00:00 GMT</pubDate>
  <author>lihouyi2013@hotmail.com (Houyi Li)</author>
  <category>MoE</category><category>Pretrain</category><category>LLM</category><category>ICLR 2026 oral</category><category>Data Scaling</category><category>Data Reuse</category><category>APPENDIX</category>
</item>
</channel>
</rss>