Skip to main content

hotshot_testing/
test_builder.rs

1// Copyright (c) 2021-2024 Espresso Systems (espressosys.com)
2// This file is part of the HotShot repository.
3
4// You should have received a copy of the MIT License
5// along with the HotShot repository. If not, see <https://mit-license.org/>.
6
7use std::{collections::HashMap, num::NonZeroUsize, rc::Rc, sync::Arc, time::Duration};
8
9use hotshot::{
10    HotShotInitializer, SystemContext, TwinsHandlerState,
11    tasks::EventTransformerState,
12    traits::{NetworkReliability, NodeImplementation, TestableNodeImplementation},
13    types::SystemContextHandle,
14};
15use hotshot_example_types::{
16    node_types::TestTypes, state_types::TestInstanceState, storage_types::TestStorage,
17    testable_delay::DelayConfig,
18};
19use hotshot_types::{
20    HotShotConfig, PeerConfig, ValidatorConfig,
21    consensus::ConsensusMetricsValue,
22    epoch_membership::EpochMembershipCoordinator,
23    storage_metrics::StorageMetricsValue,
24    traits::{node_implementation::NodeType, signature_key::StakeTableEntryType},
25};
26use hotshot_utils::anytrace::*;
27use tide_disco::Url;
28use vec1::Vec1;
29use versions::{MIN_SUPPORTED_VERSION, Upgrade};
30
31use super::{
32    completion_task::{CompletionTaskDescription, TimeBasedCompletionTaskDescription},
33    overall_safety_task::OverallSafetyPropertiesDescription,
34    txn_task::TxnTaskDescription,
35};
36use crate::{
37    helpers::{TestNodeKeyMap, key_pair_for_id},
38    node_stake::TestNodeStakes,
39    spinning_task::SpinningTaskDescription,
40    test_launcher::{Network, ResourceGenerators, TestLauncher},
41    test_task::TestTaskStateSeed,
42    view_sync_task::ViewSyncTaskDescription,
43};
44
45pub type TransactionValidator = Arc<dyn Fn(&Vec<(u64, u64)>) -> Result<()> + Send + Sync>;
46
47/// data describing how a round should be timed.
48#[derive(Clone, Debug, Copy)]
49pub struct TimingData {
50    /// Base duration for next-view timeout, in milliseconds
51    pub next_view_timeout: u64,
52    /// The maximum amount of time a leader can wait to get a block from a builder
53    pub builder_timeout: Duration,
54    /// time to wait until we request data associated with a proposal
55    pub data_request_delay: Duration,
56    /// Delay before sending through the secondary network in CombinedNetworks
57    pub secondary_network_delay: Duration,
58    /// view sync timeout
59    pub view_sync_timeout: Duration,
60}
61
62pub fn default_hotshot_config<TYPES: NodeType>(
63    known_nodes_with_stake: Vec<PeerConfig<TYPES>>,
64    known_da_nodes: Vec<PeerConfig<TYPES>>,
65    num_bootstrap_nodes: usize,
66    epoch_height: u64,
67    epoch_start_block: u64,
68) -> HotShotConfig<TYPES> {
69    HotShotConfig {
70        start_threshold: (1, 1),
71        num_nodes_with_stake: NonZeroUsize::new(known_nodes_with_stake.len()).unwrap(),
72        known_da_nodes: known_da_nodes.clone(),
73        da_committees: Default::default(),
74        num_bootstrap: num_bootstrap_nodes,
75        known_nodes_with_stake: known_nodes_with_stake.clone(),
76        da_staked_committee_size: known_da_nodes.len(),
77        fixed_leader_for_gpuvid: 1,
78        next_view_timeout: 500,
79        view_sync_timeout: Duration::from_millis(250),
80        builder_timeout: Duration::from_millis(1000),
81        data_request_delay: Duration::from_millis(200),
82        // Placeholder until we spin up the builder
83        builder_urls: vec1::vec1![Url::parse("http://localhost:9999").expect("Valid URL")],
84        start_proposing_view: u64::MAX,
85        stop_proposing_view: 0,
86        start_voting_view: u64::MAX,
87        stop_voting_view: 0,
88        start_proposing_time: u64::MAX,
89        stop_proposing_time: 0,
90        start_voting_time: u64::MAX,
91        stop_voting_time: 0,
92        epoch_height,
93        epoch_start_block,
94        stake_table_capacity: hotshot_types::light_client::DEFAULT_STAKE_TABLE_CAPACITY,
95        drb_difficulty: 10,
96        drb_upgrade_difficulty: 20,
97    }
98}
99
100#[allow(clippy::type_complexity)]
101pub fn gen_node_lists<TYPES: NodeType>(
102    num_staked_nodes: u64,
103    num_da_nodes: u64,
104    node_stakes: &TestNodeStakes,
105) -> (Vec<PeerConfig<TYPES>>, Vec<PeerConfig<TYPES>>) {
106    let mut staked_nodes = Vec::new();
107    let mut da_nodes = Vec::new();
108
109    for n in 0..num_staked_nodes {
110        let validator_config: ValidatorConfig<TYPES> = ValidatorConfig::generated_from_seed_indexed(
111            [0u8; 32],
112            n,
113            node_stakes.get(n),
114            n < num_da_nodes,
115        );
116
117        let peer_config = validator_config.public_config();
118        staked_nodes.push(peer_config.clone());
119
120        if n < num_da_nodes {
121            da_nodes.push(peer_config)
122        }
123    }
124
125    (staked_nodes, da_nodes)
126}
127
128/// metadata describing a test
129#[derive(Clone)]
130pub struct TestDescription<TYPES: NodeType, I: NodeImplementation<TYPES>> {
131    /// `HotShotConfig` used for setting up the test infrastructure.
132    ///
133    /// Note: this is not the same as the `HotShotConfig` passed to test nodes for `SystemContext::init`;
134    /// those configs are instead provided by the resource generators in the test launcher.
135    pub test_config: HotShotConfig<TYPES>,
136    /// Whether to skip initializing nodes that will start late, which will catch up later with
137    /// `HotShotInitializer::from_reload` in the spinning task.
138    pub skip_late: bool,
139    /// overall safety property description
140    pub overall_safety_properties: OverallSafetyPropertiesDescription,
141    /// spinning properties
142    pub spinning_properties: SpinningTaskDescription,
143    /// txns timing
144    pub txn_description: TxnTaskDescription,
145    /// completion task
146    pub completion_task_description: CompletionTaskDescription,
147    /// timing data
148    pub timing_data: TimingData,
149    /// unrelabile networking metadata
150    pub unreliable_network: Option<Box<dyn NetworkReliability>>,
151    /// view sync check task
152    pub view_sync_properties: ViewSyncTaskDescription,
153    /// description of builders to run
154    pub builders: Vec1<BuilderDescription>,
155    /// description of fallback builder to run
156    pub fallback_builder: BuilderDescription,
157    /// description of the solver to run
158    pub solver: FakeSolverApiDescription,
159    /// nodes with byzantine behaviour
160    pub behaviour: Rc<dyn Fn(u64) -> Behaviour<TYPES, I>>,
161    /// Delay config if any to add delays to asynchronous calls
162    pub async_delay_config: HashMap<u64, DelayConfig>,
163    /// Configured version upgrade
164    pub upgrade: versions::Upgrade,
165    /// view in which to propose an upgrade
166    pub upgrade_view: Option<u64>,
167    /// whether to initialize the solver on startup
168    pub start_solver: bool,
169    /// boxed closure used to validate the resulting transactions
170    pub validate_transactions: TransactionValidator,
171    /// stake to apply to particular nodes. Nodes not included will have a stake of 1.
172    pub node_stakes: TestNodeStakes,
173}
174
175pub fn nonempty_block_threshold(threshold: (u64, u64)) -> TransactionValidator {
176    Arc::new(move |transactions| {
177        if matches!(threshold, (0, _)) {
178            return Ok(());
179        }
180
181        let blocks: Vec<_> = transactions.iter().filter(|(view, _)| *view != 0).collect();
182
183        let num_blocks = blocks.len() as u64;
184        let mut num_nonempty_blocks = 0;
185
186        ensure!(num_blocks > 0, "Failed to commit any non-genesis blocks");
187
188        for (_, num_transactions) in blocks {
189            if *num_transactions > 0 {
190                num_nonempty_blocks += 1;
191            }
192        }
193
194        ensure!(
195            // i.e. num_nonempty_blocks / num_blocks >= threshold.0 / threshold.1
196            num_nonempty_blocks * threshold.1 >= threshold.0 * num_blocks,
197            "Failed to meet nonempty block threshold of {}/{}; got {num_nonempty_blocks} nonempty \
198             blocks out of a total of {num_blocks}",
199            threshold.0,
200            threshold.1
201        );
202
203        Ok(())
204    })
205}
206
207pub fn nonempty_block_limit(limit: (u64, u64)) -> TransactionValidator {
208    Arc::new(move |transactions| {
209        if matches!(limit, (_, 0)) {
210            return Ok(());
211        }
212
213        let blocks: Vec<_> = transactions.iter().filter(|(view, _)| *view != 0).collect();
214
215        let num_blocks = blocks.len() as u64;
216        let mut num_nonempty_blocks = 0;
217
218        ensure!(num_blocks > 0, "Failed to commit any non-genesis blocks");
219
220        for (_, num_transactions) in blocks {
221            if *num_transactions > 0 {
222                num_nonempty_blocks += 1;
223            }
224        }
225
226        ensure!(
227            // i.e. num_nonempty_blocks / num_blocks <= limit.0 / limit.1
228            num_nonempty_blocks * limit.1 <= limit.0 * num_blocks,
229            "Exceeded nonempty block limit of {}/{}; got {num_nonempty_blocks} nonempty blocks \
230             out of a total of {num_blocks}",
231            limit.0,
232            limit.1
233        );
234
235        Ok(())
236    })
237}
238
239#[derive(Debug)]
240pub enum Behaviour<TYPES: NodeType, I: NodeImplementation<TYPES>> {
241    ByzantineTwins(Box<dyn TwinsHandlerState<TYPES, I>>),
242    Byzantine(Box<dyn EventTransformerState<TYPES, I>>),
243    Standard,
244}
245
246pub async fn create_test_handle<
247    TYPES: NodeType<InstanceState = TestInstanceState>,
248    I: NodeImplementation<TYPES>,
249>(
250    metadata: TestDescription<TYPES, I>,
251    node_id: u64,
252    network: Network<TYPES, I>,
253    memberships: Arc<TYPES::Membership>,
254    config: HotShotConfig<TYPES>,
255    storage: I::Storage,
256) -> SystemContextHandle<TYPES, I> {
257    let initializer = HotShotInitializer::<TYPES>::from_genesis(
258        TestInstanceState::new(
259            metadata
260                .async_delay_config
261                .get(&node_id)
262                .cloned()
263                .unwrap_or_default(),
264        ),
265        metadata.test_config.epoch_height,
266        metadata.test_config.epoch_start_block,
267        vec![],
268        metadata.upgrade,
269    )
270    .await
271    .unwrap();
272
273    // See whether or not we should be DA
274    let is_da = node_id < config.da_staked_committee_size as u64;
275
276    let validator_config: ValidatorConfig<TYPES> = ValidatorConfig::generated_from_seed_indexed(
277        [0u8; 32],
278        node_id,
279        metadata.node_stakes.get(node_id),
280        is_da,
281    );
282
283    // Get key pair for certificate aggregation
284    let private_key = validator_config.private_key.clone();
285    let public_key = validator_config.public_key.clone();
286    let state_private_key = validator_config.state_private_key.clone();
287    let membership_coordinator =
288        EpochMembershipCoordinator::new(memberships, config.epoch_height, &storage.clone());
289
290    let behaviour = (metadata.behaviour)(node_id);
291    match behaviour {
292        Behaviour::ByzantineTwins(state) => {
293            let state = Box::leak(state);
294            let (left_handle, _right_handle) = state
295                .spawn_twin_handles(
296                    public_key,
297                    private_key,
298                    state_private_key,
299                    node_id,
300                    config,
301                    metadata.upgrade,
302                    membership_coordinator,
303                    network,
304                    initializer,
305                    ConsensusMetricsValue::default(),
306                    storage,
307                    StorageMetricsValue::default(),
308                )
309                .await;
310
311            left_handle
312        },
313        Behaviour::Byzantine(state) => {
314            let state = Box::leak(state);
315            state
316                .spawn_handle(
317                    public_key,
318                    private_key,
319                    state_private_key,
320                    node_id,
321                    config,
322                    metadata.upgrade,
323                    membership_coordinator,
324                    network,
325                    initializer,
326                    ConsensusMetricsValue::default(),
327                    storage,
328                    StorageMetricsValue::default(),
329                )
330                .await
331        },
332        Behaviour::Standard => {
333            let hotshot = SystemContext::<TYPES, I>::new(
334                public_key,
335                private_key,
336                state_private_key,
337                node_id,
338                config,
339                metadata.upgrade,
340                membership_coordinator,
341                network,
342                initializer,
343                ConsensusMetricsValue::default(),
344                storage,
345                StorageMetricsValue::default(),
346            )
347            .await;
348
349            hotshot.run_tasks().await
350        },
351    }
352}
353
354/// Describes a possible change to builder status during test
355#[derive(Clone, Debug)]
356pub enum BuilderChange {
357    // Builder should start up
358    Up,
359    // Builder should shut down completely
360    Down,
361    // Toggles whether builder should always respond
362    // to claim calls with errors
363    FailClaims(bool),
364}
365
366/// Metadata describing builder behaviour during a test
367#[derive(Clone, Debug, Default)]
368pub struct BuilderDescription {
369    /// view number -> change to builder status
370    pub changes: HashMap<u64, BuilderChange>,
371}
372
373#[derive(Clone, Debug)]
374pub struct FakeSolverApiDescription {
375    /// The rate at which errors occur in the mock solver API
376    pub error_pct: f32,
377}
378
379impl Default for TimingData {
380    fn default() -> Self {
381        Self {
382            next_view_timeout: 6000,
383            builder_timeout: Duration::from_millis(500),
384            data_request_delay: Duration::from_millis(200),
385            secondary_network_delay: Duration::from_millis(1000),
386            view_sync_timeout: Duration::from_millis(2000),
387        }
388    }
389}
390
391impl<TYPES: NodeType, I: NodeImplementation<TYPES>> TestDescription<TYPES, I> {
392    /// the default metadata for a stress test
393    #[must_use]
394    #[allow(clippy::redundant_field_names)]
395    pub fn default_stress() -> Self {
396        let num_nodes_with_stake = 100;
397
398        Self {
399            overall_safety_properties: OverallSafetyPropertiesDescription {
400                num_successful_views: 50,
401                ..OverallSafetyPropertiesDescription::default()
402            },
403            timing_data: TimingData {
404                next_view_timeout: 2000,
405                ..TimingData::default()
406            },
407            view_sync_properties: ViewSyncTaskDescription::Threshold(0, num_nodes_with_stake),
408            ..Self::default()
409        }
410    }
411
412    /// the default metadata for multiple rounds
413    #[must_use]
414    #[allow(clippy::redundant_field_names)]
415    pub fn default_multiple_rounds() -> Self {
416        let num_nodes_with_stake = 10;
417        TestDescription::<TYPES, I> {
418            overall_safety_properties: OverallSafetyPropertiesDescription {
419                num_successful_views: 20,
420                ..OverallSafetyPropertiesDescription::default()
421            },
422            timing_data: TimingData {
423                ..TimingData::default()
424            },
425            view_sync_properties: ViewSyncTaskDescription::Threshold(0, num_nodes_with_stake),
426            ..TestDescription::<TYPES, I>::default()
427        }
428    }
429
430    /// Default setting with 20 nodes and 8 views of successful views.
431    #[must_use]
432    #[allow(clippy::redundant_field_names)]
433    pub fn default_more_nodes() -> Self {
434        Self::default_more_nodes_with_stake(TestNodeStakes::default())
435    }
436
437    #[must_use]
438    #[allow(clippy::redundant_field_names)]
439    pub fn default_more_nodes_with_stake(node_stakes: TestNodeStakes) -> Self {
440        let num_nodes_with_stake = 20;
441        let num_da_nodes = 14;
442        let epoch_height = 10;
443        let epoch_start_block = 1;
444
445        let (staked_nodes, da_nodes) =
446            gen_node_lists::<TYPES>(num_nodes_with_stake, num_da_nodes, &node_stakes);
447
448        let upgrade = Upgrade::trivial(MIN_SUPPORTED_VERSION);
449        Self {
450            test_config: default_hotshot_config::<TYPES>(
451                staked_nodes,
452                da_nodes,
453                num_nodes_with_stake.try_into().unwrap(),
454                epoch_height,
455                epoch_start_block,
456            ),
457            upgrade,
458            // The first 14 (i.e., 20 - f) nodes are in the DA committee and we may shutdown the
459            // remaining 6 (i.e., f) nodes. We could remove this restriction after fixing the
460            // following issue.
461            completion_task_description: CompletionTaskDescription::TimeBasedCompletionTaskBuilder(
462                TimeBasedCompletionTaskDescription {
463                    // Increase the duration to get the expected number of successful views.
464                    duration: Duration::from_secs(340),
465                },
466            ),
467            overall_safety_properties: OverallSafetyPropertiesDescription {
468                ..Default::default()
469            },
470            timing_data: TimingData {
471                next_view_timeout: 6000,
472                ..TimingData::default()
473            },
474            view_sync_properties: ViewSyncTaskDescription::Threshold(
475                0,
476                num_nodes_with_stake.try_into().unwrap(),
477            ),
478            node_stakes,
479            ..Self::default()
480        }
481    }
482
483    pub fn set_num_nodes(self, num_nodes: u64, num_da_nodes: u64) -> Self {
484        assert!(
485            num_da_nodes <= num_nodes,
486            "Cannot build test with fewer DA than total nodes. You may have mixed up the \
487             arguments to the function"
488        );
489
490        let (staked_nodes, da_nodes) =
491            gen_node_lists::<TYPES>(num_nodes, num_da_nodes, &self.node_stakes);
492
493        let upgrade = Upgrade::trivial(MIN_SUPPORTED_VERSION);
494        Self {
495            test_config: default_hotshot_config::<TYPES>(
496                staked_nodes,
497                da_nodes,
498                self.test_config.num_bootstrap,
499                self.test_config.epoch_height,
500                self.test_config.epoch_start_block,
501            ),
502            upgrade,
503            ..self
504        }
505    }
506
507    pub fn build_node_key_map(&self) -> Arc<TestNodeKeyMap> {
508        let mut node_key_map = TestNodeKeyMap::new();
509        for i in 0..self.test_config.num_nodes_with_stake.into() {
510            let (private_key, public_key) = key_pair_for_id::<TestTypes>(i as u64);
511            node_key_map.insert(public_key, private_key);
512        }
513
514        Arc::new(node_key_map)
515    }
516
517    #[must_use]
518    pub fn default_with_stake(node_stakes: TestNodeStakes) -> Self {
519        let num_nodes_with_stake = 7;
520        let num_da_nodes = num_nodes_with_stake;
521        let epoch_height = 10;
522        let epoch_start_block = 1;
523
524        let (staked_nodes, da_nodes) =
525            gen_node_lists::<TYPES>(num_nodes_with_stake, num_da_nodes, &node_stakes);
526
527        let upgrade = Upgrade::trivial(MIN_SUPPORTED_VERSION);
528        Self {
529            test_config: default_hotshot_config::<TYPES>(
530                staked_nodes,
531                da_nodes,
532                num_nodes_with_stake.try_into().unwrap(),
533                epoch_height,
534                epoch_start_block,
535            ),
536            upgrade,
537            timing_data: TimingData::default(),
538            skip_late: false,
539            spinning_properties: SpinningTaskDescription {
540                node_changes: vec![],
541            },
542            overall_safety_properties: OverallSafetyPropertiesDescription::default(),
543            // arbitrary, haven't done the math on this
544            txn_description: TxnTaskDescription::RoundRobinTimeBased(Duration::from_millis(100)),
545            completion_task_description: CompletionTaskDescription::TimeBasedCompletionTaskBuilder(
546                TimeBasedCompletionTaskDescription {
547                    duration: Duration::from_secs(120),
548                },
549            ),
550            unreliable_network: None,
551            view_sync_properties: ViewSyncTaskDescription::Threshold(
552                0,
553                num_nodes_with_stake.try_into().unwrap(),
554            ),
555            builders: vec1::vec1![BuilderDescription::default(), BuilderDescription::default(),],
556            fallback_builder: BuilderDescription::default(),
557            solver: FakeSolverApiDescription {
558                // Default to a 10% error rate.
559                error_pct: 0.1,
560            },
561            behaviour: Rc::new(|_| Behaviour::Standard),
562            async_delay_config: HashMap::new(),
563            upgrade_view: None,
564            start_solver: true,
565            validate_transactions: Arc::new(|_| Ok(())),
566            node_stakes,
567        }
568    }
569}
570
571impl<TYPES: NodeType, I: NodeImplementation<TYPES>> Default for TestDescription<TYPES, I> {
572    /// by default, just a single round
573    #[allow(clippy::redundant_field_names)]
574    fn default() -> Self {
575        Self::default_with_stake(TestNodeStakes::default())
576    }
577}
578
579impl<TYPES: NodeType<InstanceState = TestInstanceState>, I: TestableNodeImplementation<TYPES>>
580    TestDescription<TYPES, I>
581where
582    I: NodeImplementation<TYPES>,
583{
584    /// turn a description of a test (e.g. a [`TestDescription`]) into
585    /// a [`TestLauncher`] that can be used to launch the test.
586    /// # Panics
587    /// if some of the configuration values are zero
588    pub fn gen_launcher(self) -> TestLauncher<TYPES, I> {
589        self.gen_launcher_with_tasks(vec![])
590    }
591
592    /// turn a description of a test (e.g. a [`TestDescription`]) into
593    /// a [`TestLauncher`] that can be used to launch the test, with
594    /// additional testing tasks to run in test harness
595    /// # Panics
596    /// if some of the configuration values are zero
597    #[must_use]
598    pub fn gen_launcher_with_tasks(
599        mut self,
600        additional_test_tasks: Vec<Box<dyn TestTaskStateSeed<TYPES, I>>>,
601    ) -> TestLauncher<TYPES, I> {
602        let mut connect_infos = HashMap::new();
603        let networks = <I as TestableNodeImplementation<TYPES>>::gen_networks(
604            self.test_config.num_nodes_with_stake.into(),
605            self.test_config.num_bootstrap,
606            self.test_config.da_staked_committee_size,
607            self.unreliable_network.clone(),
608            self.timing_data.secondary_network_delay,
609            &mut connect_infos,
610        );
611
612        // Update peer configs with address information created by `gen_networks`.
613        for cfg in self.test_config.known_nodes_with_stake.iter_mut() {
614            if let Some(info) = connect_infos.get(&cfg.stake_table_entry.public_key()) {
615                cfg.connect_info = Some(info.clone())
616            }
617        }
618        for cfg in self.test_config.known_da_nodes.iter_mut() {
619            if let Some(info) = connect_infos.get(&cfg.stake_table_entry.public_key()) {
620                cfg.connect_info = Some(info.clone())
621            }
622        }
623
624        let TestDescription {
625            timing_data,
626            test_config,
627            node_stakes,
628            ..
629        } = self.clone();
630
631        let validator_config = Rc::new(move |node_id| {
632            ValidatorConfig::<TYPES>::generated_from_seed_indexed(
633                [0u8; 32],
634                node_id,
635                node_stakes.get(node_id),
636                // This is the config for node 0
637                node_id < test_config.da_staked_committee_size as u64,
638            )
639        });
640
641        let hotshot_config = Rc::new(move |_| test_config.clone());
642
643        let TimingData {
644            next_view_timeout,
645            builder_timeout,
646            data_request_delay,
647            view_sync_timeout,
648            ..
649        } = timing_data;
650
651        // TODO this should really be using the timing config struct
652        let mod_hotshot_config = move |hotshot_config: &mut HotShotConfig<TYPES>| {
653            hotshot_config.next_view_timeout = next_view_timeout;
654            hotshot_config.builder_timeout = builder_timeout;
655            hotshot_config.data_request_delay = data_request_delay;
656            hotshot_config.view_sync_timeout = view_sync_timeout;
657        };
658
659        let metadata = self.clone();
660        TestLauncher {
661            resource_generators: ResourceGenerators {
662                channel_generator: networks,
663                storage: Rc::new(move |node_id| TestStorage::<TYPES> {
664                    delay_config: metadata
665                        .async_delay_config
666                        .get(&node_id)
667                        .cloned()
668                        .unwrap_or_default(),
669                    ..Default::default()
670                }),
671                hotshot_config,
672                validator_config,
673            },
674            metadata: self,
675            additional_test_tasks,
676        }
677        .map_hotshot_config(mod_hotshot_config)
678    }
679}