espresso_types/v0/v0_1/
l1.rs

1#[cfg(feature = "node")]
2use std::time::Instant;
3use std::{num::NonZeroUsize, sync::Arc, time::Duration};
4
5use alloy::primitives::{B256, U256};
6#[cfg(feature = "node")]
7use alloy::{
8    network::Ethereum,
9    providers::{
10        Identity, Provider, RootProvider,
11        fillers::{FillProvider, JoinFill, RecommendedFillers},
12    },
13    transports::http::{Client, Http},
14};
15use alloy_compat::ethers_serde;
16#[cfg(feature = "node")]
17use async_broadcast::{InactiveReceiver, Sender};
18use clap::Parser;
19#[cfg(feature = "node")]
20use derive_more::Deref;
21#[cfg(feature = "node")]
22use hotshot_types::traits::metrics::{Counter, Gauge};
23use hotshot_types::traits::metrics::{Metrics, NoMetrics};
24#[cfg(feature = "node")]
25use lru::LruCache;
26#[cfg(feature = "node")]
27use parking_lot::RwLock;
28use serde::{Deserialize, Serialize};
29#[cfg(feature = "node")]
30use tokio::{
31    sync::{Mutex, Notify},
32    task::JoinHandle,
33};
34use url::Url;
35
36use crate::v0::utils::parse_duration;
37
38#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, Hash, PartialEq, Eq)]
39pub struct L1BlockInfo {
40    pub number: u64,
41    #[serde(with = "ethers_serde::u256")]
42    pub timestamp: U256,
43    #[serde(with = "ethers_serde::b256")]
44    pub hash: B256,
45}
46
47#[cfg_attr(not(feature = "node"), allow(dead_code))]
48#[derive(Clone, Copy, Debug, PartialOrd, Ord, Hash, PartialEq, Eq)]
49pub(crate) struct L1BlockInfoWithParent {
50    pub(crate) info: L1BlockInfo,
51    pub(crate) parent_hash: B256,
52}
53
54#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, Hash, PartialEq, Eq)]
55pub struct L1Snapshot {
56    /// The relevant snapshot of the L1 includes a reference to the current head of the L1 chain.
57    ///
58    /// Note that the L1 head is subject to changing due to a reorg. However, no reorg will change
59    /// the _number_ of this block in the chain: L1 block numbers will always be sequentially
60    /// increasing. Therefore, the sequencer does not have to worry about reorgs invalidating this
61    /// snapshot.
62    pub head: u64,
63
64    /// The snapshot also includes information about the latest finalized L1 block.
65    ///
66    /// Since this block is finalized (ie cannot be reorged) we can include specific information
67    /// about the particular block, such as its hash and timestamp.
68    ///
69    /// This block may be `None` in the rare case where Espresso has started shortly after the
70    /// genesis of the L1, and the L1 has yet to finalize a block. In all other cases it will be
71    /// `Some`.
72    pub finalized: Option<L1BlockInfo>,
73}
74
75/// Configuration for an L1 client.
76#[derive(Clone, Debug, Parser)]
77pub struct L1ClientOptions {
78    /// Delay when retrying failed L1 queries.
79    #[clap(
80        long,
81        env = "ESPRESSO_L1_RETRY_DELAY",
82        default_value = "1s",
83        value_parser = parse_duration,
84    )]
85    pub l1_retry_delay: Duration,
86
87    /// Request rate when polling L1.
88    #[clap(
89        long,
90        env = "ESPRESSO_L1_POLLING_INTERVAL",
91        default_value = "7s",
92        value_parser = parse_duration,
93    )]
94    pub l1_polling_interval: Duration,
95
96    /// Maximum number of L1 blocks to keep in cache at once.
97    #[clap(long, env = "ESPRESSO_L1_BLOCKS_CACHE_SIZE", default_value = "100")]
98    pub l1_blocks_cache_size: NonZeroUsize,
99
100    /// Number of L1 events to buffer before discarding.
101    #[clap(
102        long,
103        env = "ESPRESSO_L1_EVENTS_CHANNEL_CAPACITY",
104        default_value = "100"
105    )]
106    pub l1_events_channel_capacity: usize,
107
108    /// Maximum number of L1 blocks that can be scanned for events in a single query.
109    #[clap(
110        long,
111        env = "ESPRESSO_L1_EVENTS_MAX_BLOCK_RANGE",
112        default_value = "10000"
113    )]
114    pub l1_events_max_block_range: u64,
115
116    /// Maximum time to wait for new heads before considering a stream invalid and reconnecting.
117    #[clap(
118        long,
119        env = "ESPRESSO_L1_SUBSCRIPTION_TIMEOUT",
120        default_value = "1m",
121        value_parser = parse_duration,
122    )]
123    pub subscription_timeout: Duration,
124
125    /// Fail over to another provider if the current provider fails twice within this window.
126    #[clap(
127        long,
128        env = "ESPRESSO_L1_FREQUENT_FAILURE_TOLERANCE",
129        default_value = "1m",
130        value_parser = parse_duration,
131    )]
132    pub l1_frequent_failure_tolerance: Duration,
133
134    /// Fail over to another provider if the current provider fails many times in a row, within any
135    /// time window.
136    #[clap(
137        long,
138        env = "ESPRESSO_L1_CONSECUTIVE_FAILURE_TOLERANCE",
139        default_value = "10"
140    )]
141    pub l1_consecutive_failure_tolerance: usize,
142
143    /// Revert back to the first provider this duration after failing over.
144    #[clap(
145        long,
146        env = "ESPRESSO_L1_FAILOVER_REVERT",
147        default_value = "30m",
148        value_parser = parse_duration,
149    )]
150    pub l1_failover_revert: Duration,
151
152    /// Amount of time to wait after receiving a 429 response before making more L1 RPC requests.
153    ///
154    /// If not set, the general l1-retry-delay will be used.
155    #[clap(
156        long,
157        env = "ESPRESSO_L1_RATE_LIMIT_DELAY",
158        value_parser = parse_duration,
159    )]
160    pub l1_rate_limit_delay: Option<Duration>,
161
162    /// Separate provider to use for subscription feeds.
163    ///
164    /// Typically this would be a WebSockets endpoint while the main provider uses HTTP.
165    #[clap(long, env = "ESPRESSO_L1_WS_PROVIDER", value_delimiter = ',')]
166    pub l1_ws_provider: Option<Vec<Url>>,
167
168    /// Interval at which the background update loop polls the L1 stake table contract for new events
169    /// and updates local persistence.
170    ///
171    #[clap(
172        long,
173        env = "ESPRESSO_NODE_L1_STAKE_TABLE_UPDATE_INTERVAL",
174        default_value = "60m",
175        value_parser = parse_duration,
176    )]
177    pub stake_table_update_interval: Duration,
178
179    /// Maximum duration to retry fetching L1 events before panicking.
180    ///
181    /// This prevents infinite retries by panicking if the total number of retries exceed the maximum duration.
182    /// This is helpful in cases where the RPC block range limit or the event return limit is hit,
183    /// or if there is an outage. In such cases, panicking ensures that the node operator can take
184    /// action instead of the node getting stuck indefinitely. This is necessary because the stake table is constructed
185    /// from the fetched events, and is required for node to participate in consensus.
186    #[clap(
187        long,
188        env = "ESPRESSO_L1_EVENTS_MAX_RETRY_DURATION",
189        default_value = "20m",
190        value_parser = parse_duration,
191    )]
192    pub l1_events_max_retry_duration: Duration,
193
194    /// A block range which is expected to contain the finalized heads of all L1 provider chains.
195    ///
196    /// If specified, it is assumed that if a block `n` is known to be finalized according to a
197    /// certain provider, then any block less than `n - L1_FINALIZED_SAFETY_MARGIN` is finalized
198    /// _according to any provider_. In other words, if we fail over from one provider to another,
199    /// the second provider will never be lagging the first by more than this margin.
200    ///
201    /// This allows us to quickly query for very old finalized blocks by number. Without this
202    /// assumption, we always need to verify that a block is finalized by fetching all blocks in a
203    /// hash chain between the known finalized block and the desired block, recomputing and checking
204    /// the hashes. This is fine and good for blocks very near the finalized head, but for
205    /// extremely old blocks it is prohibitively expensive, and these old blocks are extremely
206    /// unlikely to be unfinalized anyways.
207    #[clap(long, env = "ESPRESSO_L1_FINALIZED_SAFETY_MARGIN")]
208    pub l1_finalized_safety_margin: Option<u64>,
209
210    #[clap(skip = Arc::<Box<dyn Metrics>>::new(Box::new(NoMetrics)))]
211    pub metrics: Arc<Box<dyn Metrics>>,
212}
213
214/// Type alias for alloy provider
215#[cfg(feature = "node")]
216pub type L1Provider = FillProvider<
217    JoinFill<Identity, <Ethereum as RecommendedFillers>::RecommendedFillers>,
218    RootProvider,
219>;
220
221#[cfg(feature = "node")]
222#[derive(Clone, Debug, Deref)]
223/// An Ethereum provider and configuration to interact with the L1.
224///
225/// This client runs asynchronously, updating an in-memory snapshot of the relevant L1 information
226/// each time a new L1 block is published. The main advantage of this is that we can update the L1
227/// state at the pace of the L1, instead of the much faster pace of HotShot consensus.This makes it
228/// easy to use a subscription instead of polling for new blocks, vastly reducing the number of L1
229/// RPC calls we make.
230pub struct L1Client {
231    /// The alloy provider used for L1 communication with wallet and default fillers
232    #[deref]
233    pub provider: L1Provider,
234    /// Actual transport used in `self.provider`
235    /// i.e. the `t` variable in `ProviderBuilder::new().on_client(RpcClient::new(t, is_local))`
236    pub transport: SwitchingTransport,
237    /// Shared state updated by an asynchronous task which polls the L1.
238    pub(crate) state: Arc<Mutex<L1State>>,
239    /// Channel used by the async update task to send events to clients.
240    pub(crate) sender: Sender<L1Event>,
241    /// Receiver for events from the async update task.
242    pub(crate) receiver: InactiveReceiver<L1Event>,
243    /// Async task which updates the shared state.
244    pub(crate) update_task: Arc<L1UpdateTask>,
245}
246
247#[cfg(feature = "node")]
248impl Provider for L1Client {
249    fn root(&self) -> &RootProvider {
250        self.provider.root()
251    }
252}
253
254/// In-memory view of the L1 state, updated asynchronously.
255#[cfg(feature = "node")]
256#[derive(Debug)]
257pub(crate) struct L1State {
258    pub(crate) snapshot: L1Snapshot,
259    pub(crate) finalized: LruCache<u64, L1BlockInfoWithParent>,
260    pub(crate) last_finalized: Option<u64>,
261}
262
263#[cfg(feature = "node")]
264#[derive(Clone, Debug)]
265pub(crate) enum L1Event {
266    NewHead { head: u64 },
267    NewFinalized { finalized: L1BlockInfoWithParent },
268}
269
270#[cfg(feature = "node")]
271#[derive(Debug, Default)]
272pub(crate) struct L1UpdateTask(pub(crate) Mutex<Option<JoinHandle<()>>>);
273
274#[cfg(feature = "node")]
275#[derive(Clone, Debug)]
276pub(crate) struct L1ClientMetrics {
277    pub(crate) head: Arc<dyn Gauge>,
278    pub(crate) finalized: Arc<dyn Gauge>,
279    pub(crate) reconnects: Arc<dyn Counter>,
280    pub(crate) failovers: Arc<dyn Counter>,
281    pub(crate) failures: Arc<Vec<Box<dyn Counter>>>,
282}
283
284/// An RPC client with multiple remote (HTTP) providers.
285///
286/// This client utilizes one RPC provider at a time, but if it detects that the provider is in a
287/// failing state, it will automatically switch to the next provider in its list.
288#[cfg(feature = "node")]
289#[derive(Clone, Debug)]
290pub struct SwitchingTransport {
291    /// The transport currently being used by the client
292    pub(crate) current_transport: Arc<RwLock<SingleTransport>>,
293    /// The list of configured HTTP URLs to use for RPC requests
294    pub(crate) urls: Arc<Vec<Url>>,
295    pub(crate) opt: Arc<L1ClientOptions>,
296    pub(crate) metrics: L1ClientMetrics,
297    pub(crate) switch_notify: Arc<Notify>,
298}
299
300/// The state of the current provider being used by a [`SwitchingTransport`].
301/// This is cloneable and returns a reference to the same underlying data.
302#[cfg(feature = "node")]
303#[derive(Debug, Clone)]
304pub(crate) struct SingleTransport {
305    pub(crate) generation: usize,
306    pub(crate) client: Http<Client>,
307    pub(crate) status: Arc<RwLock<SingleTransportStatus>>,
308    /// Time at which to revert back to the primary provider after a failover.
309    pub(crate) revert_at: Option<Instant>,
310}
311
312/// The status of a single transport
313#[cfg(feature = "node")]
314#[derive(Debug, Default)]
315pub(crate) struct SingleTransportStatus {
316    pub(crate) last_failure: Option<Instant>,
317    pub(crate) consecutive_failures: usize,
318    pub(crate) rate_limited_until: Option<Instant>,
319    /// Whether or not this current transport is being shut down (switching to the next transport)
320    pub(crate) shutting_down: bool,
321}
espresso_types/v0/v0_1/l1.rs

espresso_types/v0/v0_1/
l1.rs