hotshot_query_service/data_source/
storage.rs

1// Copyright (c) 2022 Espresso Systems (espressosys.com)
2// This file is part of the HotShot Query Service library.
3//
4// This program is free software: you can redistribute it and/or modify it under the terms of the GNU
5// General Public License as published by the Free Software Foundation, either version 3 of the
6// License, or (at your option) any later version.
7// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
8// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9// General Public License for more details.
10// You should have received a copy of the GNU General Public License along with this program. If not,
11// see <https://www.gnu.org/licenses/>.
12
13//! Persistent storage for data sources.
14//!
15//! Naturally, an archival query service such as this is heavily dependent on a persistent storage
16//! implementation. This module defines the interfaces required of this storage. Any storage layer
17//! implementing the appropriate interfaces can be used as the storage layer when constructing a
18//! [`FetchingDataSource`](super::FetchingDataSource), which can in turn be used to instantiate the
19//! REST APIs provided by this crate.
20//!
21//! This module also comes with a few pre-built persistence implementations:
22//! * [`SqlStorage`]
23//! * [`FileSystemStorage`]
24//!
25//! # Storage Traits vs Data Source Traits
26//!
27//! Many of the traits defined in this module (e.g. [`NodeStorage`], [`ExplorerStorage`], and
28//! others) are nearly identical to the corresponding data source traits (e.g.
29//! [`NodeDataSource`](crate::node::NodeDataSource),
30//! [`ExplorerDataSource`](crate::explorer::ExplorerDataSource), etc). They typically differ in
31//! mutability: the storage traits are intended to be implemented on storage
32//! [transactions](super::Transaction), and because even reading may update the internal
33//! state of a transaction, such as a buffer or database cursor, these traits typically take `&mut
34//! self`. This is not a barrier for concurrency since there may be many transactions open
35//! simultaneously from a single data source. The data source traits, meanwhile, are implemented on
36//! the data source itself. Internally, they usually open a fresh transaction and do all their work
37//! on the transaction, not modifying the data source itself, so they take `&self`.
38//!
39//! For traits that differ _only_ in the mutability of the `self` parameter, it is almost possible
40//! to combine them into a single trait whose methods take `self` by value, and implementing said
41//! traits for the reference types `&SomeDataSource` and `&mut SomeDataSourceTransaction`. There are
42//! two problems with this approach, which lead us to prefer the slight redundance of having
43//! separate versions of the traits with mutable and immutable methods:
44//! * The trait bounds quickly get out of hand, since we now have trait bounds not only on the type
45//!   itself, but also on references to that type, and the reference also requires the introduction
46//!   of an additional lifetime parameter.
47//! * We run into a longstanding [`rustc` bug](https://github.com/rust-lang/rust/issues/85063) in
48//!   which type inference diverges when given trait bounds on reference types, even when
49//!   theoretically the types are uniquely inferable. This issue can be worked around by [explicitly
50//!   specifying type parameters at every call site](https://users.rust-lang.org/t/type-recursion-when-trait-bound-is-added-on-reference-type/74525/2),
51//!   but this further exacerbates the ergonomic issues with this approach, past the point of
52//!   viability.
53//!
54//! Occasionally, there may be further differences between the data source traits and corresponding
55//! storage traits. For example, [`AvailabilityStorage`] also differs from
56//! [`AvailabilityDataSource`](crate::availability::AvailabilityDataSource) in fallibility.
57//!
58
59use std::ops::RangeBounds;
60
61use alloy::primitives::map::HashMap;
62use async_trait::async_trait;
63use futures::future::Future;
64use hotshot_types::{
65    data::VidShare, simple_certificate::CertificatePair, traits::node_implementation::NodeType,
66};
67use jf_merkle_tree_compat::prelude::MerkleProof;
68use tagged_base64::TaggedBase64;
69
70use crate::{
71    Header, Payload, QueryResult, Transaction,
72    availability::{
73        BlockId, BlockQueryData, LeafId, LeafQueryData, NamespaceId, PayloadMetadata,
74        PayloadQueryData, QueryableHeader, QueryablePayload, TransactionHash, VidCommonMetadata,
75        VidCommonQueryData,
76    },
77    explorer::{
78        query_data::{
79            BlockDetail, BlockIdentifier, BlockSummary, ExplorerSummary, GetBlockDetailError,
80            GetBlockSummariesError, GetBlockSummariesRequest, GetExplorerSummaryError,
81            GetSearchResultsError, GetTransactionDetailError, GetTransactionSummariesError,
82            GetTransactionSummariesRequest, SearchResult, TransactionDetailResponse,
83            TransactionIdentifier, TransactionSummary,
84        },
85        traits::{ExplorerHeader, ExplorerTransaction},
86    },
87    merklized_state::{MerklizedState, Snapshot},
88    node::{SyncStatusQueryData, TimeWindowQueryData, WindowStart},
89    types::HeightIndexed,
90};
91
92pub mod fail_storage;
93pub mod fs;
94mod ledger_log;
95pub mod pruning;
96pub mod sql;
97
98#[cfg(any(test, feature = "testing"))]
99pub use fail_storage::FailStorage;
100#[cfg(feature = "file-system-data-source")]
101pub use fs::FileSystemStorage;
102#[cfg(feature = "sql-data-source")]
103pub use sql::{SqlStorage, StorageConnectionType};
104
105/// Persistent storage for a HotShot blockchain.
106///
107/// This trait defines the interface which must be provided by the storage layer in order to
108/// implement an availability data source. It is very similar to
109/// [`AvailabilityDataSource`](crate::availability::AvailabilityDataSource) with every occurrence of
110/// [`Fetch`](crate::availability::Fetch) replaced by [`QueryResult`]. This is not a coincidence.
111/// The purpose of the storage layer is to provide all of the functionality of the data source
112/// layer, but independent of an external fetcher for missing data. Thus, when the storage layer
113/// encounters missing, corrupt, or inaccessible data, it simply gives up and replaces the missing
114/// data with [`Err`], rather than creating an asynchronous fetch request to retrieve the missing
115/// data.
116///
117/// Rust gives us ways to abstract and deduplicate these two similar APIs, but they do not lead to a
118/// better interface.
119#[async_trait]
120pub trait AvailabilityStorage<Types>: Send + Sync
121where
122    Types: NodeType,
123    Header<Types>: QueryableHeader<Types>,
124    Payload<Types>: QueryablePayload<Types>,
125{
126    async fn get_leaf(&mut self, id: LeafId<Types>) -> QueryResult<LeafQueryData<Types>>;
127    async fn get_block(&mut self, id: BlockId<Types>) -> QueryResult<BlockQueryData<Types>>;
128    async fn get_header(&mut self, id: BlockId<Types>) -> QueryResult<Header<Types>>;
129    async fn get_payload(&mut self, id: BlockId<Types>) -> QueryResult<PayloadQueryData<Types>>;
130    async fn get_payload_metadata(
131        &mut self,
132        id: BlockId<Types>,
133    ) -> QueryResult<PayloadMetadata<Types>>;
134    async fn get_vid_common(
135        &mut self,
136        id: BlockId<Types>,
137    ) -> QueryResult<VidCommonQueryData<Types>>;
138    async fn get_vid_common_metadata(
139        &mut self,
140        id: BlockId<Types>,
141    ) -> QueryResult<VidCommonMetadata<Types>>;
142
143    async fn get_leaf_range<R>(
144        &mut self,
145        range: R,
146    ) -> QueryResult<Vec<QueryResult<LeafQueryData<Types>>>>
147    where
148        R: RangeBounds<usize> + Send + 'static;
149    async fn get_block_range<R>(
150        &mut self,
151        range: R,
152    ) -> QueryResult<Vec<QueryResult<BlockQueryData<Types>>>>
153    where
154        R: RangeBounds<usize> + Send + 'static;
155
156    async fn get_header_range<R>(
157        &mut self,
158        range: R,
159    ) -> QueryResult<Vec<QueryResult<Header<Types>>>>
160    where
161        R: RangeBounds<usize> + Send + 'static,
162    {
163        let blocks = self.get_block_range(range).await?;
164        Ok(blocks
165            .into_iter()
166            .map(|block| block.map(|block| block.header))
167            .collect())
168    }
169    async fn get_payload_range<R>(
170        &mut self,
171        range: R,
172    ) -> QueryResult<Vec<QueryResult<PayloadQueryData<Types>>>>
173    where
174        R: RangeBounds<usize> + Send + 'static;
175    async fn get_payload_metadata_range<R>(
176        &mut self,
177        range: R,
178    ) -> QueryResult<Vec<QueryResult<PayloadMetadata<Types>>>>
179    where
180        R: RangeBounds<usize> + Send + 'static;
181    async fn get_vid_common_range<R>(
182        &mut self,
183        range: R,
184    ) -> QueryResult<Vec<QueryResult<VidCommonQueryData<Types>>>>
185    where
186        R: RangeBounds<usize> + Send + 'static;
187    async fn get_vid_common_metadata_range<R>(
188        &mut self,
189        range: R,
190    ) -> QueryResult<Vec<QueryResult<VidCommonMetadata<Types>>>>
191    where
192        R: RangeBounds<usize> + Send + 'static;
193
194    async fn get_block_with_transaction(
195        &mut self,
196        hash: TransactionHash<Types>,
197    ) -> QueryResult<BlockQueryData<Types>>;
198
199    /// Get the first leaf which is available in the database with height >= `from`.
200    async fn first_available_leaf(&mut self, from: u64) -> QueryResult<LeafQueryData<Types>>;
201}
202
203pub trait UpdateAvailabilityStorage<Types>: Send
204where
205    Types: NodeType,
206{
207    fn insert_leaf(
208        &mut self,
209        leaf: &LeafQueryData<Types>,
210    ) -> impl Send + Future<Output = anyhow::Result<()>> {
211        self.insert_leaf_range([leaf])
212    }
213
214    fn insert_leaf_with_qc_chain(
215        &mut self,
216        leaf: &LeafQueryData<Types>,
217        qc_chain: Option<[CertificatePair<Types>; 2]>,
218    ) -> impl Send + Future<Output = anyhow::Result<()>> {
219        async move {
220            self.insert_leaf(leaf).await?;
221            self.insert_qc_chain(leaf.height(), qc_chain).await?;
222            Ok(())
223        }
224    }
225
226    fn insert_block(
227        &mut self,
228        block: &BlockQueryData<Types>,
229    ) -> impl Send + Future<Output = anyhow::Result<()>> {
230        self.insert_block_range([block])
231    }
232
233    fn insert_vid<'a>(
234        &mut self,
235        common: &'a VidCommonQueryData<Types>,
236        share: Option<&'a VidShare>,
237    ) -> impl Send + Future<Output = anyhow::Result<()>> {
238        self.insert_vid_range([(common, share)])
239    }
240
241    fn insert_qc_chain(
242        &mut self,
243        height: u64,
244        qc_chain: Option<[CertificatePair<Types>; 2]>,
245    ) -> impl Send + Future<Output = anyhow::Result<()>>;
246    fn insert_leaf_range<'a>(
247        &mut self,
248        leaves: impl Send + IntoIterator<IntoIter: Send, Item = &'a LeafQueryData<Types>>,
249    ) -> impl Send + Future<Output = anyhow::Result<()>>;
250    fn insert_block_range<'a>(
251        &mut self,
252        blocks: impl Send + IntoIterator<IntoIter: Send, Item = &'a BlockQueryData<Types>>,
253    ) -> impl Send + Future<Output = anyhow::Result<()>>;
254    fn insert_vid_range<'a>(
255        &mut self,
256        vid: impl Send
257        + IntoIterator<
258            IntoIter: Send,
259            Item = (&'a VidCommonQueryData<Types>, Option<&'a VidShare>),
260        >,
261    ) -> impl Send + Future<Output = anyhow::Result<()>>;
262}
263
264#[async_trait]
265pub trait NodeStorage<Types>
266where
267    Types: NodeType,
268    Header<Types>: QueryableHeader<Types>,
269{
270    async fn block_height(&mut self) -> QueryResult<usize>;
271    async fn count_transactions_in_range(
272        &mut self,
273        range: impl RangeBounds<usize> + Send,
274        namespace: Option<NamespaceId<Types>>,
275    ) -> QueryResult<usize>;
276    async fn payload_size_in_range(
277        &mut self,
278        range: impl RangeBounds<usize> + Send,
279        namespace: Option<NamespaceId<Types>>,
280    ) -> QueryResult<usize>;
281    async fn vid_share<ID>(&mut self, id: ID) -> QueryResult<VidShare>
282    where
283        ID: Into<BlockId<Types>> + Send + Sync;
284    async fn get_header_window(
285        &mut self,
286        start: impl Into<WindowStart<Types>> + Send + Sync,
287        end: u64,
288        limit: usize,
289    ) -> QueryResult<TimeWindowQueryData<Header<Types>>>;
290
291    async fn latest_qc_chain(&mut self) -> QueryResult<Option<[CertificatePair<Types>; 2]>>;
292
293    /// Search the given range of the database for missing objects.
294    async fn sync_status_for_range(
295        &mut self,
296        from: usize,
297        to: usize,
298    ) -> QueryResult<SyncStatusQueryData>;
299}
300
301#[derive(Clone, Debug, Default)]
302pub struct Aggregate<Types: NodeType>
303where
304    Header<Types>: QueryableHeader<Types>,
305{
306    pub height: i64,
307    pub num_transactions: HashMap<Option<NamespaceId<Types>>, usize>,
308    pub payload_size: HashMap<Option<NamespaceId<Types>>, usize>,
309}
310
311pub trait AggregatesStorage<Types>
312where
313    Types: NodeType,
314    Header<Types>: QueryableHeader<Types>,
315{
316    /// The block height for which aggregate statistics are currently available.
317    fn aggregates_height(&mut self) -> impl Future<Output = anyhow::Result<usize>> + Send;
318
319    /// the last aggregate
320    fn load_prev_aggregate(
321        &mut self,
322    ) -> impl Future<Output = anyhow::Result<Option<Aggregate<Types>>>> + Send;
323}
324
325pub trait UpdateAggregatesStorage<Types>
326where
327    Types: NodeType,
328    Header<Types>: QueryableHeader<Types>,
329{
330    /// Update aggregate statistics based on a new block.
331    fn update_aggregates(
332        &mut self,
333        aggregate: Aggregate<Types>,
334        blocks: &[PayloadMetadata<Types>],
335    ) -> impl Future<Output = anyhow::Result<Aggregate<Types>>> + Send;
336}
337
338/// An interface for querying Data and Statistics from the HotShot Blockchain.
339///
340/// This interface provides methods that allows the enabling of querying data
341/// concerning the blockchain from the stored data for use with a
342/// block explorer.  It does not provide the same guarantees as the
343/// Availability data source with data fetching.  It is not concerned with
344/// being up-to-date or having all of the data required, but rather it is
345/// concerned with providing the requested data as quickly as possible, and in
346/// a way that can be easily cached.
347#[async_trait]
348pub trait ExplorerStorage<Types>
349where
350    Types: NodeType,
351    Header<Types>: ExplorerHeader<Types> + QueryableHeader<Types>,
352    Transaction<Types>: ExplorerTransaction<Types>,
353    Payload<Types>: QueryablePayload<Types>,
354{
355    /// `get_block_detail` is a method that retrieves the details of a specific
356    /// block from the blockchain.  The block is identified by the given
357    /// [BlockIdentifier].
358    async fn get_block_detail(
359        &mut self,
360        request: BlockIdentifier<Types>,
361    ) -> Result<BlockDetail<Types>, GetBlockDetailError>;
362
363    /// `get_block_summaries` is a method that retrieves a list of block
364    /// summaries from the blockchain.  The list is generated from the given
365    /// [GetBlockSummariesRequest].
366    async fn get_block_summaries(
367        &mut self,
368        request: GetBlockSummariesRequest<Types>,
369    ) -> Result<Vec<BlockSummary<Types>>, GetBlockSummariesError>;
370
371    /// `get_transaction_detail` is a method that retrieves the details of a
372    /// specific transaction from the blockchain.  The transaction is identified
373    /// by the given [TransactionIdentifier].
374    async fn get_transaction_detail(
375        &mut self,
376        request: TransactionIdentifier<Types>,
377    ) -> Result<TransactionDetailResponse<Types>, GetTransactionDetailError>;
378
379    /// `get_transaction_summaries` is a method that retrieves a list of
380    /// transaction summaries from the blockchain.  The list is generated from
381    /// the given [GetTransactionSummariesRequest].
382    async fn get_transaction_summaries(
383        &mut self,
384        request: GetTransactionSummariesRequest<Types>,
385    ) -> Result<Vec<TransactionSummary<Types>>, GetTransactionSummariesError>;
386
387    /// `get_explorer_summary` is a method that retrieves a summary overview of
388    /// the blockchain.  This is useful for displaying information that
389    /// indicates the overall status of the block chain.
390    async fn get_explorer_summary(
391        &mut self,
392    ) -> Result<ExplorerSummary<Types>, GetExplorerSummaryError>;
393
394    /// `get_search_results` is a method that retrieves the results of a search
395    /// query against the blockchain.  The results are generated from the given
396    /// query string.
397    async fn get_search_results(
398        &mut self,
399        query: TaggedBase64,
400    ) -> Result<SearchResult<Types>, GetSearchResultsError>;
401}
402
403/// This trait defines methods that a data source should implement
404/// It enables retrieval of the membership path for a leaf node, which can be used to reconstruct the Merkle tree state.
405#[async_trait]
406pub trait MerklizedStateStorage<Types, State, const ARITY: usize>
407where
408    Types: NodeType,
409    State: MerklizedState<Types, ARITY>,
410{
411    async fn get_path(
412        &mut self,
413        snapshot: Snapshot<Types, State, ARITY>,
414        key: State::Key,
415    ) -> QueryResult<MerkleProof<State::Entry, State::Key, State::T, ARITY>>;
416}
417
418#[async_trait]
419pub trait MerklizedStateHeightStorage {
420    async fn get_last_state_height(&mut self) -> QueryResult<usize>;
421}