Sanitization Reference
Sanitization Reference
Overview
Sanitizers automatically transform data before it’s stored in the database. Unlike validators (which reject invalid data), sanitizers modify data to conform to expected formats.
Key points:
- Sanitizers run before validators
- Data is transformed, not rejected
- Multiple sanitizers can be chained
- Sanitizers apply on both insert and update
Syntax
The #[sanitizer(...)] attribute adds sanitization rules to fields:
use ic_dbms_api::prelude::*;
#[derive(Table, ...)]
#[table = "users"]
pub struct User {
#[primary_key]
pub id: Uint32,
// Unit struct sanitizer (no parameters)
#[sanitizer(TrimSanitizer)]
pub name: Text,
// Tuple struct sanitizer (positional parameter)
#[sanitizer(RoundToScaleSanitizer(2))]
pub balance: Decimal,
// Named fields sanitizer
#[sanitizer(ClampSanitizer, min = 0, max = 120)]
pub age: Uint8,
}
Built-in Sanitizers
All sanitizers are available in ic_dbms_api::prelude.
String Sanitizers
TrimSanitizer - Remove leading/trailing whitespace
#[sanitizer(TrimSanitizer)]
pub name: Text,
// " Alice " → "Alice"
CollapseWhitespaceSanitizer - Collapse multiple spaces into one
#[sanitizer(CollapseWhitespaceSanitizer)]
pub description: Text,
// "Hello World" → "Hello World"
LowerCaseSanitizer - Convert to lowercase
#[sanitizer(LowerCaseSanitizer)]
pub email: Text,
// "Alice@Example.COM" → "alice@example.com"
UpperCaseSanitizer - Convert to uppercase
#[sanitizer(UpperCaseSanitizer)]
pub country_code: Text,
// "us" → "US"
SlugSanitizer - Convert to URL-safe slug
#[sanitizer(SlugSanitizer)]
pub slug: Text,
// "Hello World! This is a Test" → "hello-world-this-is-a-test"
UrlEncodingSanitizer - URL encode special characters
#[sanitizer(UrlEncodingSanitizer)]
pub path: Text,
// "hello world" → "hello%20world"
Numeric Sanitizers
RoundToScaleSanitizer - Round decimal to specific precision
#[sanitizer(RoundToScaleSanitizer(2))]
pub price: Decimal,
// 19.999 → 20.00
// 19.994 → 19.99
ClampSanitizer - Clamp value to range (signed)
#[sanitizer(ClampSanitizer, min = -100, max = 100)]
pub temperature: Int32,
// 150 → 100
// -150 → -100
ClampUnsignedSanitizer - Clamp value to range (unsigned)
#[sanitizer(ClampUnsignedSanitizer, min = 0, max = 100)]
pub percentage: Uint8,
// 150 → 100
// 0 → 0
DateTime Sanitizers
TimezoneSanitizer - Convert to specific timezone
#[sanitizer(TimezoneSanitizer("America/New_York"))]
pub local_time: DateTime,
UtcSanitizer - Convert to UTC
#[sanitizer(UtcSanitizer)]
pub timestamp: DateTime,
// Any timezone → UTC
Null Sanitizers
NullIfEmptySanitizer - Convert empty strings to null
#[sanitizer(NullIfEmptySanitizer)]
pub bio: Nullable<Text>,
// "" → Null
// "Hello" → "Hello"
Implementing Custom Sanitizers
Create a struct implementing the Sanitize trait:
use ic_dbms_api::prelude::{Sanitize, Value, IcDbmsResult};
/// Capitalizes the first letter of each word
pub struct TitleCaseSanitizer;
impl Sanitize for TitleCaseSanitizer {
fn sanitize(&self, value: Value) -> IcDbmsResult<Value> {
match value {
Value::Text(text) => {
let title_case = text
.as_str()
.split_whitespace()
.map(|word| {
let mut chars = word.chars();
match chars.next() {
None => String::new(),
Some(first) => {
first.to_uppercase().to_string() +
chars.as_str().to_lowercase().as_str()
}
}
})
.collect::<Vec<_>>()
.join(" ");
Ok(Value::Text(title_case.into()))
}
other => Ok(other), // Pass through non-text values
}
}
}
// Usage
#[sanitizer(TitleCaseSanitizer)]
pub title: Text,
// "hello world" → "Hello World"
Custom sanitizer with parameters:
/// Truncates string to max length
pub struct TruncateSanitizer(pub usize);
impl Sanitize for TruncateSanitizer {
fn sanitize(&self, value: Value) -> IcDbmsResult<Value> {
match value {
Value::Text(text) => {
let truncated: String = text.as_str().chars().take(self.0).collect();
Ok(Value::Text(truncated.into()))
}
other => Ok(other),
}
}
}
// Usage
#[sanitizer(TruncateSanitizer(100))]
pub summary: Text,
// "very long text..." → truncated to 100 chars
Custom sanitizer with named parameters:
/// Replaces a pattern with replacement
pub struct ReplaceSanitizer {
pub pattern: &'static str,
pub replacement: &'static str,
}
impl Sanitize for ReplaceSanitizer {
fn sanitize(&self, value: Value) -> IcDbmsResult<Value> {
match value {
Value::Text(text) => {
let replaced = text.as_str().replace(self.pattern, self.replacement);
Ok(Value::Text(replaced.into()))
}
other => Ok(other),
}
}
}
// Usage
#[sanitizer(ReplaceSanitizer, pattern = "\n", replacement = " ")]
pub single_line: Text,
Sanitization Order
When multiple sanitizers are applied, they run in declaration order:
#[derive(Table, ...)]
#[table = "users"]
pub struct User {
#[primary_key]
pub id: Uint32,
// Order matters!
#[sanitizer(TrimSanitizer)] // 1. Trim whitespace
#[sanitizer(CollapseWhitespaceSanitizer)] // 2. Collapse spaces
#[sanitizer(LowerCaseSanitizer)] // 3. Lowercase
pub email: Text,
}
// Input: " Alice@Example.COM "
// After TrimSanitizer: "Alice@Example.COM"
// After CollapseWhitespaceSanitizer: "Alice@Example.COM" (no change)
// After LowerCaseSanitizer: "alice@example.com"
Sanitizers run before validators:
#[sanitizer(TrimSanitizer)] // 1. Trim
#[validate(MaxStrlenValidator(100))] // 2. Validate length (after trim)
pub name: Text,
Examples
User profile sanitization:
#[derive(Debug, Table, CandidType, Deserialize, Clone, PartialEq, Eq)]
#[table = "users"]
pub struct User {
#[primary_key]
pub id: Uint32,
// Clean up name
#[sanitizer(TrimSanitizer)]
#[sanitizer(CollapseWhitespaceSanitizer)]
pub name: Text,
// Normalize email
#[sanitizer(TrimSanitizer)]
#[sanitizer(LowerCaseSanitizer)]
pub email: Text,
// Convert empty to null
#[sanitizer(NullIfEmptySanitizer)]
pub bio: Nullable<Text>,
// Uppercase country code
#[sanitizer(UpperCaseSanitizer)]
pub country: Nullable<Text>,
}
Financial data sanitization:
#[derive(Debug, Table, CandidType, Deserialize, Clone, PartialEq, Eq)]
#[table = "transactions"]
pub struct Transaction {
#[primary_key]
pub id: Uuid,
// Round to cents
#[sanitizer(RoundToScaleSanitizer(2))]
pub amount: Decimal,
// Ensure positive (clamp negatives to 0)
#[sanitizer(ClampUnsignedSanitizer, min = 0, max = 1000000)]
pub fee: Uint32,
// Always store in UTC
#[sanitizer(UtcSanitizer)]
pub timestamp: DateTime,
}
Content sanitization:
#[derive(Debug, Table, CandidType, Deserialize, Clone, PartialEq, Eq)]
#[table = "articles"]
pub struct Article {
#[primary_key]
pub id: Uuid,
// Clean title
#[sanitizer(TrimSanitizer)]
#[sanitizer(CollapseWhitespaceSanitizer)]
pub title: Text,
// Generate URL-safe slug
#[sanitizer(SlugSanitizer)]
pub slug: Text,
// Clean up content
#[sanitizer(TrimSanitizer)]
pub content: Text,
// Optional summary
#[sanitizer(TrimSanitizer)]
#[sanitizer(NullIfEmptySanitizer)]
pub summary: Nullable<Text>,
}
Combined sanitization and validation:
#[derive(Debug, Table, CandidType, Deserialize, Clone, PartialEq, Eq)]
#[table = "products"]
pub struct Product {
#[primary_key]
pub id: Uuid,
// Sanitize then validate
#[sanitizer(TrimSanitizer)]
#[sanitizer(CollapseWhitespaceSanitizer)]
#[validate(RangeStrlenValidator(1, 200))]
pub name: Text,
// Sanitize price to 2 decimals, no validation needed
#[sanitizer(RoundToScaleSanitizer(2))]
pub price: Decimal,
// Create slug and validate format
#[sanitizer(SlugSanitizer)]
#[validate(KebabCaseValidator)]
#[validate(MaxStrlenValidator(100))]
pub slug: Text,
// Clean URL and validate format
#[sanitizer(TrimSanitizer)]
#[validate(UrlValidator)]
pub image_url: Nullable<Text>,
}