[{"data":1,"prerenderedAt":416},["ShallowReactive",2],{"\u002Fcn\u002Fmemos_cloud\u002Fintroduction\u002Falgorithm":3,"surround-\u002Fcn\u002Fmemos_cloud\u002Fintroduction\u002Falgorithm":400},{"id":4,"title":5,"avatar":6,"banner":6,"body":7,"category":6,"desc":118,"description":382,"extension":394,"links":6,"meta":395,"navigation":6,"path":396,"seo":397,"stem":398,"__hash__":399},"docs\u002Fcn\u002Fmemos_cloud\u002Fintroduction\u002Falgorithm.md","MemOS算法原理概述",null,{"type":8,"value":9,"toc":381},"minimark",[10,27,29,34,37,50,58,60,73,93,95,98,109,111,119,121,125,139,146,163,193,195,212,214,236,249,251,255,261,289,295,303,305,309,314,333,339,341,345,360,365,367,371],[11,12,13],"note",{},[14,15,16,17,20,21],"p",{},"提示",[18,19],"br",{},"论文地址：",[22,23,24],"a",{"href":24,"rel":25},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2507.03724",[26],"nofollow",[18,28],{},[30,31,33],"h2",{"id":32},"_1-什么是memos","1. 什么是 MemOS？",[14,35,36],{},"当下的大语言模型（LLM）已经展现出强大的生成和推理能力，但它们普遍缺乏真正的「记忆」。",[38,39,40,44,47],"ul",{},[41,42,43],"li",{},"在多轮对话中，它们常常遗忘早期信息；",[41,45,46],{},"在应用场景中，它们无法沉淀用户的个性化偏好；",[41,48,49],{},"在知识迭代时，它们更新缓慢，无法灵活应对新需求。",[14,51,52,53,57],{},"这使得 LLM 虽然“聪明”，却难以成为真正的 ",[54,55,56],"strong",{},"老师、同事或助手","。",[18,59],{},[14,61,62,65,66,68,69,72],{},[54,63,64],{},"MemOS（Memory Operating System）"," 正是为了解决这一根本性缺陷而提出。",[18,67],{},"\n它把「记忆」从一个零散的功能，提升为与算力同等重要的 ",[54,70,71],{},"系统资源","，为 LLM 提供：",[38,74,75,81,87],{},[41,76,77,80],{},[54,78,79],{},"统一的记忆层","：跨越单一对话，支撑长期知识沉淀和上下文管理；",[41,82,83,86],{},[54,84,85],{},"持久化与结构化能力","：让记忆能够被保存、追溯和复用；",[41,88,89,92],{},[54,90,91],{},"记忆增强推理","：在推理时调用历史经验和偏好，生成更符合用户需求的答案。",[18,94],{},[14,96,97],{},"相比传统的做法（如单纯依赖参数记忆或临时 KV 缓存），MemOS 的价值在于：",[38,99,100,103,106],{},[41,101,102],{},"它让 AI 不再是“看过就忘”，而是能持续进化和学习；",[41,104,105],{},"它不仅能回答当下的问题，更能利用过去的积累改善未来的表现；",[41,107,108],{},"它为开发者提供了统一接口，把“记忆”从复杂的自研逻辑变成标准化能力。",[18,110],{},[14,112,113,114,116],{},"简而言之，MemOS 的目标是：",[18,115],{},[54,117,118],{},"让大模型从一次性对话工具，进化为真正具有长期记忆和自适应能力的智能体。",[18,120],{},[30,122,124],{"id":123},"_2-memos架构设计","2. MemOS架构设计",[14,126,127,128,131,132],{},"MemOS 的设计核心，是把「记忆」作为一个独立系统层，和计算、存储一样，成为 AI 应用的基础能力。它的整体架构可以概括为 ",[54,129,130],{},"三层结构","： ",[133,134,136],"span",{"style":135},"color: rgb(61, 170, 214);",[54,137,138],{},"API 与应用接口层、记忆调度与管理层、记忆存储与基础设施层",[14,140,141],{},[142,143],"img",{"alt":144,"src":145},"art.gif","https:\u002F\u002Fstatics.memtensor.com.cn\u002Fmemos\u002Fart.gif",[38,147,148],{},[41,149,150,151,156,157,162],{},"在 ",[133,152,153],{"style":135},[54,154,155],{},"API 与应用接口层","，MemOS 提供了标准化的 Memory API，开发者可以通过简单的接口实现",[133,158,159],{"style":135},[54,160,161],{},"记忆创建、删除、更新","等操作，让大模型具备易于调用和扩展的持久记忆能力，支持多轮对话、长期任务和跨会话个性化等复杂应用场景。",[164,165,166],"blockquote",{},[14,167,168,169,173,174,184,185,188,189,192],{},"这里的 ",[170,171,172],"code",{},"API 层"," 指的是框架内部的标准化接口设计，用于阐述系统原理与能力边界。",[133,175,177,183],{"style":176},"color: #ff7100;",[54,178,179],{},[180,181,182],"u",{},"不同于云服务对外提供的开发接口"," ","（如 ",[170,186,187],{},"add","、",[170,190,191],{},"search"," 等简化封装），后者是基于 MemOS 能力在后端抽象后的统一入口。",[18,194],{},[38,196,197],{},[41,198,199,200,205,206,211],{},"在记忆调度与管理层，MemOS 提出了",[133,201,202],{"style":135},[54,203,204],{},"记忆调度（Memory Scheduling）","的全新范式，支持基于上下文的 ",[133,207,208],{"style":135},[54,209,210],{},"“下一场景预测”（Next-Scene Prediction），","可以在模型生成时提前加载潜在需要的记忆片段，显著降低响应延迟、提升推理效率。",[18,213],{},[38,215,216],{},[41,217,218,219,224,225,230,231],{},"而在",[133,220,221],{"style":135},[54,222,223],{},"记忆存储与基础设施层，","MemOS 通过标准化的 ",[133,226,227],{"style":135},[54,228,229],{},"MemCube"," 封装，将明文记忆、激活记忆和参数记忆三种形态有机整合。它支持多种持久化存储方式，包括 Graph 数据库、向量数据库等，并具备",[133,232,233],{"style":135},[54,234,235],{},"跨模型的记忆迁移与复用能力。",[237,238,240,241,240,245],"figure",{"style":239},"width: fit-content","\n  ",[142,242],{"src":243,"alt":244},"https:\u002F\u002Fcdn.memtensor.com.cn\u002Fimg\u002F1758183504775_2ehjk2_compressed.png","标准化 MemCube（记忆立方体）的基础构成",[246,247,244],"figcaption",{"style":248},"text-align: center;",[18,250],{},[30,252,254],{"id":253},"_3-memos为什么高效","3. MemOS为什么高效？",[11,256,258],{"icon":257},"ri:message-2-line",[14,259,260],{},"从Next-Token Prediction到Next-Scene Prediction",[38,262,263,270,273,280,286],{},[41,264,265,266,269],{},"在传统的大模型问答系统中，生成流程依然遵循",[54,267,268],{},"同步的Next-Token机制","：模型接收用户问题→实时检索外部片段→按token逐字生成答案。",[41,271,272],{},"检索或计算产生的任何停顿，都会直接拉长整条推理链路，知识注入与生成紧密耦合，导致GPU容易出现空等，用户端响应时延明显。",[41,274,275,276,279],{},"与这种传统范式不同，MemOS 从记忆建模的视角出发，提出了",[54,277,278],{},"记忆调度范式","，通过设计异步调度框架，提前预测模型可能需要的记忆信息，显著降低实时生成中的效率损耗。",[41,281,282,283,57],{},"MemOS 实现了针对MemCube中的三种核心记忆类型（参数记忆、激活记忆、明文记忆），以及外部知识库（包括互联网检索与超大规模本地知识）等多元知识的",[54,284,285],{},"联合调度",[41,287,288],{},"依托对对话轮次与时间差的精准感知，系统能够智能预测下一个场景中可能被调用的记忆内容，并动态路由与预加载所需的明文、参数和激活记忆，从而在生成阶段即刻命中，最大化信息引入的效率和推理的流畅性。",[14,290,291],{},[142,292],{"alt":293,"src":294},"640.gif","https:\u002F\u002Fstatics.memtensor.com.cn\u002Fmemos\u002Fani.gif",[237,296,240,297,240,301],{"style":239},[142,298],{"src":299,"alt":300},"https:\u002F\u002Fcdn.memtensor.com.cn\u002Fimg\u002F1758687680524_waiu4s_compressed.png ","记忆调度的核心思路",[246,302,300],{"style":248},[18,304],{},[30,306,308],{"id":307},"_4-memos-preview版本性能详细评估结果","4. MemOS-Preview 版本性能详细评估结果",[310,311,313],"h3",{"id":312},"_41-locomo记忆评测","4.1 LoCoMo记忆评测",[38,315,316,323,326],{},[41,317,318,319,322],{},"为系统性验证MemOS在真实应用场景下的表现，MemOS团队基于",[54,320,321],{},"LoCoMo数据集","进行了全面评测。",[41,324,325],{},"作为当前业界广泛认可的记忆管理基准，LoCoMo已被多种主流框架采用，用于检验模型的记忆存取能力与多轮对话一致性。",[41,327,328,329,332],{},"从官方公开的评测数据来看，",[54,330,331],{},"MemOS在准确率和计算效率上均实现了显著提升","，相较于OpenAI的全局记忆方案，在关键指标上展现出更优的性能表现，进一步验证了其在记忆调度、管理与推理融合方面的技术领先性。",[14,334,335],{},[142,336],{"alt":337,"src":338},"image.png","https:\u002F\u002Fcdn.memtensor.com.cn\u002Fimg\u002F1758687655761_blkqnr_compressed.png",[18,340],{},[310,342,344],{"id":343},"_42-kvcache记忆评测","4.2 KV Cache记忆评测",[38,346,347,350,353],{},[41,348,349],{},"除了通用的记忆能力评估，研究团队还重点考察了MemOS所提出的KV Cache记忆机制在推理加速方面的实际效果。",[41,351,352],{},"通过在不同上下文长度（Short\u002FMedium\u002FLong）以及不同模型规模（8B\u002F32B\u002F72B）下进行对比测试，系统性评估了缓存构建时间（Build）、**首Token响应时间（TTFT）以及整体加速比（Speedup）**等关键指标。",[41,354,355,356,359],{},"实验结果（见图10）表明，",[54,357,358],{},"MemOS在多种配置下均显著优化了KV Cache的构建与复用效率","，使推理过程更加高效流畅，有效缩短了用户的等待时延，并在大规模模型场景中实现了可观的性能加速。",[14,361,362],{},[142,363],{"alt":337,"src":364},"https:\u002F\u002Fcdn.memtensor.com.cn\u002Fimg\u002F1758687596553_iptom0_compressed.png",[18,366],{},[30,368,370],{"id":369},"_5-下一步行动","5. 下一步行动",[38,372,373],{},[41,374,375,376,380],{},"了解",[22,377,379],{"href":378},"\u002Fmemos_cloud\u002Fcloud_and_opensource","云服务与开源方案","，体验MemOS的强大之处吧!",{"title":382,"searchDepth":383,"depth":383,"links":384},"",2,[385,386,387,388,393],{"id":32,"depth":383,"text":33},{"id":123,"depth":383,"text":124},{"id":253,"depth":383,"text":254},{"id":307,"depth":383,"text":308,"children":389},[390,392],{"id":312,"depth":391,"text":313},3,{"id":343,"depth":391,"text":344},{"id":369,"depth":383,"text":370},"md",{},"\u002Fcn\u002Fmemos_cloud\u002Fintroduction\u002Falgorithm",{"title":5,"description":382},"cn\u002Fmemos_cloud\u002Fintroduction\u002Falgorithm","OcqF38mdV8Ro24Phf9kXxvFWYevi8mmS06VvogigoGg",[401,409],{"title":402,"path":403,"stem":404,"icon":405,"framework":6,"module":6,"class":406,"target":-1,"active":407,"defaultOpen":407,"children":-1,"description":408},"记忆生命周期管理","\u002Fcn\u002Fmemos_cloud\u002Fintroduction\u002Fmem_lifecycle","memos_cloud\u002Fintroduction\u002Fmem_lifecycle","i-ri-refresh-line",[],false,"在 MemOS 中，记忆并不是静态存放的，而是会随着时间和使用情况不断演化。",{"title":410,"path":411,"stem":412,"icon":413,"framework":6,"module":6,"class":414,"target":-1,"active":407,"defaultOpen":407,"children":-1,"description":415},"云平台与开源方案","\u002Fcn\u002Fmemos_cloud\u002Fcloud_and_opensource","memos_cloud\u002Fcloud_and_opensource","i-ri-dashboard-line",[],"选择最适合您需求的 MemOS “记忆”方案。",1774339745433]