Memory OS

Python SDK

The official Python SDK for Memory OS provides a fully-typed client for storing, searching, and retrieving memories in your Python applications. Supports both synchronous and asynchronous operations.

Installation

Install the SDK using pip:

Bash
pip install memoryos

Or with poetry:

Bash
poetry add memoryos

Or with pipenv:

Bash
pipenv install memoryos

Requirements

  • Python 3.8 or higher
  • requests library (installed automatically)
  • aiohttp library (for async support, installed automatically)

Quick Start

Python
import os
from memoryos import MemoryOS

# Initialize the client
memory = MemoryOS(api_key=os.environ["MEMORY_OS_API_KEY"])

# Store a memory
new_memory = memory.memories.create(
    content="User prefers dark mode and Python",
    tier="long",
    content_type="fact",
    memory_nature="semantic"
)

# Search for memories
results = memory.search(
    query="What are the user's preferences?",
    threshold=0.7
)

# Get context for an LLM
context = memory.get_context(
    query="Help the user with their request",
    max_tokens=2000
)

print(context["context"])

Client Configuration

Constructor Parameters

Python
class MemoryOS:
    def __init__(
        self,
        api_key: str,
        base_url: str = "https://api.mymemoryos.com/v1",
        timeout: int = 30,
        max_retries: int = 3,
        headers: Optional[Dict[str, str]] = None
    ):
        ...
ParameterTypeRequiredDefaultDescription
api_keystrYes-Your API key (starts with mos_)
base_urlstrNohttps://api.mymemoryos.com/v1API base URL
timeoutintNo30Request timeout in seconds
max_retriesintNo3Maximum retry attempts
headersdictNoNoneAdditional request headers

Basic Configuration

Python
import os
from memoryos import MemoryOS

memory = MemoryOS(api_key=os.environ["MEMORY_OS_API_KEY"])

Advanced Configuration

Python
from memoryos import MemoryOS

memory = MemoryOS(
    api_key=os.environ["MEMORY_OS_API_KEY"],
    base_url="https://api.mymemoryos.com/v1",
    timeout=60,
    max_retries=5,
    headers={"X-Custom-Header": "value"}
)

API Reference

memories.create()

Create a new memory.

Python
def create(
    self,
    content: str,
    tier: Literal["short", "medium", "long"] = "short",
    content_type: str = "text",
    memory_nature: Optional[Literal["episodic", "semantic"]] = None,
    parent_memory_id: Optional[str] = None,
    metadata: Optional[Dict[str, Any]] = None,
    external_id: Optional[str] = None,
    source_id: Optional[str] = None,
    importance_score: Optional[float] = None
) -> Dict[str, Any]:
    ...

Parameters:

ParameterTypeRequiredDefaultDescription
contentstrYes-Memory content
tierstrNo"short"Memory tier: short, medium, long
content_typestrNo"text"Content type
memory_naturestrNoNoneepisodic or semantic
parent_memory_idstrNoNoneParent memory UUID
metadatadictNoNoneCustom metadata
external_idstrNoNoneYour external reference
source_idstrNoNoneSource identifier
importance_scorefloatNoNoneImportance (0-1)

Returns: Memory dict

Example:

Python
from memoryos import MemoryOS

memory_client = MemoryOS(api_key=os.environ["MEMORY_OS_API_KEY"])

# Create a basic memory
new_memory = memory_client.memories.create(
    content="User is a senior Python developer at DataCorp",
    tier="long",
    content_type="fact",
    memory_nature="semantic"
)

print(f"Created memory: {new_memory['id']}")

# Create with full options
detailed_memory = memory_client.memories.create(
    content="User prefers pandas over polars for data manipulation",
    tier="long",
    content_type="fact",
    memory_nature="semantic",
    importance_score=0.8,
    metadata={
        "source": "conversation",
        "user_id": "user_123",
        "topic": "data_tools",
        "verified": True
    }
)

memories.list()

Retrieve a paginated list of memories.

Python
def list(
    self,
    limit: int = 50,
    offset: int = 0,
    tier: Optional[Literal["short", "medium", "long"]] = None,
    content_type: Optional[str] = None,
    memory_nature: Optional[Literal["episodic", "semantic"]] = None,
    min_relevance: Optional[float] = None
) -> Dict[str, Any]:
    ...

Parameters:

ParameterTypeRequiredDefaultDescription
limitintNo50Max results (max 100)
offsetintNo0Pagination offset
tierstrNoNoneFilter by tier
content_typestrNoNoneFilter by content type
memory_naturestrNoNoneFilter by nature
min_relevancefloatNoNoneMinimum relevance score

Returns: Dict with data (list of memories) and meta (pagination info)

Example:

Python
# Get all long-term memories
result = memory_client.memories.list(tier="long", limit=50)

print(f"Found {result['meta']['total']} long-term memories")
for mem in result["data"]:
    print(f"- {mem['content'][:50]}...")

# Paginate through all memories
all_memories = []
offset = 0
limit = 50

while True:
    page = memory_client.memories.list(limit=limit, offset=offset)
    all_memories.extend(page["data"])

    if not page["meta"]["has_more"]:
        break

    offset += limit

print(f"Retrieved {len(all_memories)} total memories")

# Filter by relevance
relevant_memories = memory_client.memories.list(
    tier="long",
    min_relevance=0.7,
    limit=20
)

memories.get()

Retrieve a single memory by ID.

Python
def get(self, memory_id: str) -> Dict[str, Any]:
    ...

Parameters:

ParameterTypeRequiredDescription
memory_idstrYesMemory UUID

Returns: Memory dict

Example:

Python
try:
    mem = memory_client.memories.get("550e8400-e29b-41d4-a716-446655440000")
    print(f"Content: {mem['content']}")
    print(f"Tier: {mem['tier']}")
    print(f"Access count: {mem['access_count']}")
    print(f"Created: {mem['created_at']}")
except MemoryOSError as e:
    if e.code == "NOT_FOUND":
        print("Memory not found")
    else:
        raise

memories.update()

Update an existing memory.

Python
def update(
    self,
    memory_id: str,
    content: Optional[str] = None,
    tier: Optional[Literal["short", "medium", "long"]] = None,
    importance_score: Optional[float] = None,
    memory_nature: Optional[Literal["episodic", "semantic"]] = None,
    metadata: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
    ...

Parameters:

ParameterTypeRequiredDescription
memory_idstrYesMemory UUID
contentstrNoNew content (triggers re-embedding)
tierstrNoNew tier
importance_scorefloatNoNew importance (0-1)
memory_naturestrNoNew nature
metadatadictNoReplaces existing metadata

Returns: Updated memory dict

Example:

Python
# Promote a memory to long-term
updated = memory_client.memories.update(
    "550e8400-e29b-41d4-a716-446655440000",
    tier="long",
    importance_score=0.9,
    metadata={
        "verified": True,
        "promoted_at": datetime.utcnow().isoformat()
    }
)

print(f"Updated tier: {updated['tier']}")
print(f"New importance: {updated['importance_score']}")

# Update content (triggers re-embedding)
updated = memory_client.memories.update(
    "550e8400-e29b-41d4-a716-446655440000",
    content="User is a senior Python developer specializing in data engineering at DataCorp"
)

memories.delete()

Permanently delete a memory.

Python
def delete(self, memory_id: str) -> Dict[str, bool]:
    ...

Parameters:

ParameterTypeRequiredDescription
memory_idstrYesMemory UUID

Returns: Dict with deleted: True

Example:

Python
try:
    result = memory_client.memories.delete("550e8400-e29b-41d4-a716-446655440000")
    if result["deleted"]:
        print("Memory deleted successfully")
except MemoryOSError as e:
    print(f"Failed to delete: {e.message}")

Perform semantic search across memories.

Python
def search(
    self,
    query: Optional[str] = None,
    embedding: Optional[List[float]] = None,
    threshold: float = 0.7,
    limit: int = 20,
    tier: Optional[Literal["short", "medium", "long"]] = None,
    memory_nature: Optional[Literal["episodic", "semantic"]] = None,
    tags: Optional[List[str]] = None,
    entities: Optional[List[str]] = None
) -> Dict[str, Any]:
    ...

Parameters:

ParameterTypeRequiredDefaultDescription
querystrNo*NoneNatural language query
embeddinglistNo*NonePre-computed embedding (1536 dims)
thresholdfloatNo0.7Minimum similarity (0-1)
limitintNo20Max results (max 100)
tierstrNoNoneFilter by tier
memory_naturestrNoNoneFilter by nature
tagslistNoNoneFilter by tags
entitieslistNoNoneFilter by entity IDs

*Either query or embedding is required.

Returns: Dict with results, search_type, and threshold

Example:

Python
# Basic semantic search
results = memory_client.search(
    query="What programming languages does the user know?",
    threshold=0.7,
    limit=10
)

print(f"Found {len(results['results'])} results")
for result in results["results"]:
    print(f"[{result['combined_score']:.2f}] {result['content']}")

# Filtered search
long_term_results = memory_client.search(
    query="user preferences",
    tier="long",
    memory_nature="semantic",
    threshold=0.6,
    limit=20
)

# Search with pre-computed embedding
import openai

client = openai.OpenAI()
response = client.embeddings.create(
    model="text-embedding-3-small",
    input="user preferences"
)
embedding = response.data[0].embedding

results = memory_client.search(
    embedding=embedding,
    threshold=0.75
)

get_context()

Retrieve formatted context for LLM prompt injection.

Python
def get_context(
    self,
    query: str,
    max_tokens: int = 4000,
    tier: Optional[Literal["short", "medium", "long"]] = None,
    format: Literal["text", "json"] = "text"
) -> Dict[str, Any]:
    ...

Parameters:

ParameterTypeRequiredDefaultDescription
querystrYes-Context query
max_tokensintNo4000Token budget
tierstrNoNoneFilter by tier
formatstrNo"text"Output format

Returns: Dict with context, memories, token_count, retrieval_time_ms

Example:

Python
# Get context for LLM prompt
context = memory_client.get_context(
    query="User is asking about Python best practices",
    max_tokens=2000
)

print(f"Retrieved {len(context['memories'])} memories")
print(f"Token count: {context['token_count']}")
print(f"Retrieval time: {context['retrieval_time_ms']}ms")

# Use in LLM prompt
system_prompt = f"""You are a helpful assistant. Here is context about the user:

{context['context']}

Use this context to personalize your response."""

# Get tiered context
short_context = memory_client.get_context(
    query="current conversation",
    tier="short",
    max_tokens=500
)

long_context = memory_client.get_context(
    query="user preferences and background",
    tier="long",
    max_tokens=1500
)

combined_prompt = f"""# Long-term Knowledge
{long_context['context']}

# Current Session
{short_context['context']}"""

Async Support

The SDK includes an async client for use with asyncio.

Python
import asyncio
from memoryos import AsyncMemoryOS

async def main():
    memory = AsyncMemoryOS(api_key=os.environ["MEMORY_OS_API_KEY"])

    # All methods are async
    new_memory = await memory.memories.create(
        content="User prefers async Python",
        tier="long",
        content_type="fact"
    )

    results = await memory.search(
        query="user preferences",
        threshold=0.7
    )

    context = await memory.get_context(
        query="Help the user",
        max_tokens=2000
    )

    # Close the client when done
    await memory.close()

asyncio.run(main())

Async Context Manager

Python
import asyncio
from memoryos import AsyncMemoryOS

async def main():
    async with AsyncMemoryOS(api_key=os.environ["MEMORY_OS_API_KEY"]) as memory:
        # Client is automatically closed when exiting the context
        result = await memory.memories.create(
            content="Test memory",
            tier="short"
        )
        print(f"Created: {result['id']}")

asyncio.run(main())

Parallel Operations

Python
import asyncio
from memoryos import AsyncMemoryOS

async def parallel_operations():
    async with AsyncMemoryOS(api_key=os.environ["MEMORY_OS_API_KEY"]) as memory:
        # Execute multiple operations in parallel
        context, search_results, memories = await asyncio.gather(
            memory.get_context(query="user preferences", max_tokens=1000),
            memory.search(query="recent interactions", tier="short"),
            memory.memories.list(tier="long", limit=10)
        )

        print(f"Context tokens: {context['token_count']}")
        print(f"Search results: {len(search_results['results'])}")
        print(f"Long-term memories: {len(memories['data'])}")

asyncio.run(parallel_operations())

Type Hints

The SDK includes comprehensive type hints for better IDE support and type checking.

Python
from typing import Optional, Dict, Any, List, Literal
from memoryos import MemoryOS
from memoryos.types import (
    Memory,
    SearchResult,
    ContextResponse,
    Tier,
    MemoryNature,
    ContentType
)

# Type aliases
Tier = Literal["short", "medium", "long"]
MemoryNature = Literal["episodic", "semantic"]
ContentType = Literal["text", "conversation", "document", "event", "fact"]

# Typed function example
def store_user_preference(
    client: MemoryOS,
    user_id: str,
    preference: str
) -> Memory:
    return client.memories.create(
        content=preference,
        tier="long",
        content_type="fact",
        memory_nature="semantic",
        importance_score=0.8,
        metadata={"user_id": user_id}
    )


def get_user_context(
    client: MemoryOS,
    user_id: str,
    query: str
) -> ContextResponse:
    return client.get_context(
        query=query,
        max_tokens=2000
    )

Error Handling

The SDK raises MemoryOSError for API errors.

Python
from memoryos.exceptions import MemoryOSError

class MemoryOSError(Exception):
    code: str           # Error code
    message: str        # Error message
    status: int         # HTTP status code
    request_id: str     # Request ID for debugging
    retry_after: int    # Seconds to wait (for rate limits)

Error Codes

CodeHTTP StatusDescription
VALIDATION_ERROR400Invalid request parameters
AUTHENTICATION_ERROR401Invalid or missing API key
FORBIDDEN403Insufficient permissions
NOT_FOUND404Resource not found
RATE_LIMIT_EXCEEDED429Too many requests
INTERNAL_ERROR500Server error

Error Handling Example

Python
from memoryos import MemoryOS
from memoryos.exceptions import MemoryOSError
import time

memory = MemoryOS(api_key=os.environ["MEMORY_OS_API_KEY"])


def safe_operation():
    try:
        return memory.memories.create(
            content="User data",
            tier="long"
        )
    except MemoryOSError as e:
        if e.code == "VALIDATION_ERROR":
            print(f"Invalid input: {e.message}")
            raise ValueError(e.message)

        elif e.code == "AUTHENTICATION_ERROR":
            print("Check your API key")
            raise RuntimeError("Authentication failed")

        elif e.code == "RATE_LIMIT_EXCEEDED":
            print(f"Rate limited. Retry after {e.retry_after}s")
            time.sleep(e.retry_after)
            return safe_operation()  # Retry

        elif e.code == "NOT_FOUND":
            print("Resource not found")
            return None

        else:
            print(f"API error: {e.code} - {e.message}")
            raise

    except Exception as e:
        print(f"Unexpected error: {e}")
        raise

Retry with Exponential Backoff

Python
import time
from typing import TypeVar, Callable
from memoryos.exceptions import MemoryOSError

T = TypeVar("T")


def with_retry(
    fn: Callable[[], T],
    max_retries: int = 3,
    base_delay: float = 1.0
) -> T:
    """Execute function with retry logic."""
    last_error = None

    for attempt in range(max_retries):
        try:
            return fn()
        except MemoryOSError as e:
            last_error = e

            # Don't retry validation or auth errors
            if e.code in ["VALIDATION_ERROR", "AUTHENTICATION_ERROR", "FORBIDDEN"]:
                raise

            # Handle rate limiting
            if e.code == "RATE_LIMIT_EXCEEDED" and e.retry_after:
                time.sleep(e.retry_after)
                continue

            # Exponential backoff for other errors
            if attempt < max_retries - 1:
                delay = base_delay * (2 ** attempt)
                print(f"Attempt {attempt + 1} failed, retrying in {delay}s")
                time.sleep(delay)

    raise last_error


# Usage
result = with_retry(
    lambda: memory.memories.create(
        content="Important data",
        tier="long"
    )
)

Complete Examples

Chat Application with Memory

Python
import os
from memoryos import MemoryOS
from openai import OpenAI

memory = MemoryOS(api_key=os.environ["MEMORY_OS_API_KEY"])
openai_client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])


def chat(user_id: str, message: str) -> str:
    # Get relevant context
    context = memory.get_context(
        query=message,
        max_tokens=2000
    )

    # Generate response
    completion = openai_client.chat.completions.create(
        model="gpt-4",
        messages=[
            {
                "role": "system",
                "content": f"You are a helpful assistant. Context:\n\n{context['context']}"
            },
            {"role": "user", "content": message}
        ]
    )

    response = completion.choices[0].message.content

    # Store conversation turn
    memory.memories.create(
        content=f"User: {message}\nAssistant: {response}",
        tier="short",
        content_type="conversation",
        memory_nature="episodic",
        metadata={"user_id": user_id}
    )

    return response


# Usage
response = chat("user_123", "What's the best way to learn Python?")
print(response)

FastAPI Integration

Python
from fastapi import FastAPI, HTTPException, Header
from pydantic import BaseModel
from memoryos import MemoryOS
from memoryos.exceptions import MemoryOSError
import os

app = FastAPI()
memory = MemoryOS(api_key=os.environ["MEMORY_OS_API_KEY"])


class CreateMemoryRequest(BaseModel):
    content: str
    tier: str = "short"
    content_type: str = "text"
    metadata: dict = {}


class SearchRequest(BaseModel):
    query: str
    threshold: float = 0.7
    limit: int = 20


@app.post("/memories")
async def create_memory(
    request: CreateMemoryRequest,
    x_user_id: str = Header(...)
):
    try:
        result = memory.memories.create(
            content=request.content,
            tier=request.tier,
            content_type=request.content_type,
            metadata={**request.metadata, "user_id": x_user_id}
        )
        return {"success": True, "data": result}
    except MemoryOSError as e:
        raise HTTPException(status_code=e.status, detail=e.message)


@app.post("/search")
async def search_memories(request: SearchRequest):
    try:
        results = memory.search(
            query=request.query,
            threshold=request.threshold,
            limit=request.limit
        )
        return {"success": True, "data": results}
    except MemoryOSError as e:
        raise HTTPException(status_code=e.status, detail=e.message)


@app.get("/context")
async def get_context(query: str, max_tokens: int = 2000):
    try:
        context = memory.get_context(
            query=query,
            max_tokens=max_tokens
        )
        return {"success": True, "data": context}
    except MemoryOSError as e:
        raise HTTPException(status_code=e.status, detail=e.message)

Memory Maintenance Script

Python
import os
from datetime import datetime
from memoryos import MemoryOS

memory = MemoryOS(api_key=os.environ["MEMORY_OS_API_KEY"])


def promote_important_memories():
    """Promote frequently accessed short-term memories."""
    result = memory.memories.list(tier="short", limit=100)

    promoted_count = 0
    for mem in result["data"]:
        if mem["access_count"] > 5:
            memory.memories.update(
                mem["id"],
                tier="medium",
                importance_score=min(1, (mem.get("importance_score", 0.5) + 0.1))
            )
            promoted_count += 1
            print(f"Promoted {mem['id']} to medium-term")

    print(f"Promoted {promoted_count} memories")


def cleanup_low_relevance_memories():
    """Delete low-relevance, never-accessed memories."""
    result = memory.memories.list(tier="short", limit=100)

    deleted_count = 0
    for mem in result["data"]:
        if mem["relevance_score"] < 0.1 and mem["access_count"] == 0:
            memory.memories.delete(mem["id"])
            deleted_count += 1
            print(f"Deleted low-relevance memory {mem['id']}")

    print(f"Deleted {deleted_count} memories")


def report_memory_stats():
    """Generate memory statistics report."""
    stats = {
        "short": memory.memories.list(tier="short", limit=1)["meta"]["total"],
        "medium": memory.memories.list(tier="medium", limit=1)["meta"]["total"],
        "long": memory.memories.list(tier="long", limit=1)["meta"]["total"]
    }

    print("\n=== Memory Statistics ===")
    print(f"Short-term: {stats['short']}")
    print(f"Medium-term: {stats['medium']}")
    print(f"Long-term: {stats['long']}")
    print(f"Total: {sum(stats.values())}")


if __name__ == "__main__":
    print("Running memory maintenance...")
    promote_important_memories()
    cleanup_low_relevance_memories()
    report_memory_stats()

Ctrl+Shift+C to copy