vid/
avidm.rs

1//! This module implements the AVID-M scheme, whose name came after the DispersedLedger paper <https://www.usenix.org/conference/nsdi22/presentation/yang>.
2//!
3//! To disperse a payload to a number of storage nodes according to a weight
4//! distribution, the payload is first converted into field elements and then
5//! divided into chunks of `k` elements each, and each chunk is then encoded
6//! into `n` field elements using Reed Solomon code. The parameter `n` equals to
7//! the total weight of all storage nodes, and `k` is the minimum collective
8//! weights required to recover the original payload. After the encoding, it can
9//! be viewed as `n` vectors of field elements each of length equals to the
10//! number of chunks. The VID commitment is obtained by Merklized these `n`
11//! vectors. And for dispersal, each storage node gets some vectors and their
12//! Merkle proofs according to its weight.
13
14use std::{collections::HashMap, iter, ops::Range};
15
16use ark_ff::PrimeField;
17use ark_poly::{EvaluationDomain, Radix2EvaluationDomain};
18use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
19use ark_std::{end_timer, start_timer};
20use config::AvidMConfig;
21use jf_merkle_tree::MerkleTreeScheme;
22use jf_utils::canonical;
23use p3_maybe_rayon::prelude::{
24    IntoParallelIterator, IntoParallelRefIterator, ParallelIterator, ParallelSlice,
25};
26use serde::{Deserialize, Serialize};
27use tagged_base64::tagged;
28
29use crate::{
30    VidError, VidResult, VidScheme,
31    utils::bytes_to_field::{self, bytes_to_field, field_to_bytes},
32};
33
34mod config;
35
36pub mod namespaced;
37pub mod proofs;
38
39#[cfg(all(not(feature = "sha256"), not(feature = "keccak256")))]
40type Config = config::Poseidon2Config;
41#[cfg(feature = "sha256")]
42type Config = config::Sha256Config;
43#[cfg(feature = "keccak256")]
44type Config = config::Keccak256Config;
45
46// Type alias for convenience
47type F = <Config as AvidMConfig>::BaseField;
48type MerkleTree = <Config as AvidMConfig>::MerkleTree;
49type MerkleProof = <MerkleTree as MerkleTreeScheme>::MembershipProof;
50type MerkleCommit = <MerkleTree as MerkleTreeScheme>::Commitment;
51
52/// Commit type for AVID-M scheme.
53#[derive(
54    Clone,
55    Copy,
56    Debug,
57    Default,
58    Hash,
59    CanonicalSerialize,
60    CanonicalDeserialize,
61    Eq,
62    PartialEq,
63    Ord,
64    PartialOrd,
65)]
66#[tagged("AvidMCommit")]
67#[repr(C)]
68pub struct AvidMCommit {
69    /// Root commitment of the Merkle tree.
70    pub commit: MerkleCommit,
71}
72
73impl AsRef<[u8]> for AvidMCommit {
74    fn as_ref(&self) -> &[u8] {
75        unsafe {
76            ::core::slice::from_raw_parts(
77                (self as *const Self) as *const u8,
78                ::core::mem::size_of::<Self>(),
79            )
80        }
81    }
82}
83
84impl AsRef<[u8; 32]> for AvidMCommit {
85    fn as_ref(&self) -> &[u8; 32] {
86        unsafe { ::core::slice::from_raw_parts((self as *const Self) as *const u8, 32) }
87            .try_into()
88            .unwrap()
89    }
90}
91
92/// Share type to be distributed among the parties.
93#[derive(Clone, Debug, Hash, Serialize, Deserialize, Eq, PartialEq)]
94pub struct RawAvidMShare {
95    /// Range of this share in the encoded payload.
96    range: Range<usize>,
97    /// Actual share content.
98    #[serde(with = "canonical")]
99    payload: Vec<Vec<F>>,
100    /// Merkle proof of the content.
101    #[serde(with = "canonical")]
102    mt_proofs: Vec<MerkleProof>,
103}
104
105/// Share type to be distributed among the parties.
106#[derive(Clone, Debug, Hash, Serialize, Deserialize, Eq, PartialEq)]
107pub struct AvidMShare {
108    /// Index number of the given share.
109    index: u32,
110    /// The length of payload in bytes.
111    payload_byte_len: usize,
112    /// Content of this AvidMShare.
113    content: RawAvidMShare,
114}
115
116/// Public parameters of the AVID-M scheme.
117#[derive(Clone, Debug, Hash, Serialize, Deserialize, PartialEq, Eq)]
118pub struct AvidMParam {
119    /// Total weights of all storage nodes
120    pub total_weights: usize,
121    /// Minimum collective weights required to recover the original payload.
122    pub recovery_threshold: usize,
123}
124
125impl AvidMParam {
126    /// Construct a new [`AvidMParam`].
127    pub fn new(recovery_threshold: usize, total_weights: usize) -> VidResult<Self> {
128        if recovery_threshold == 0 || total_weights < recovery_threshold {
129            return Err(VidError::InvalidParam);
130        }
131        Ok(Self {
132            total_weights,
133            recovery_threshold,
134        })
135    }
136}
137
138/// Helper: initialize a FFT domain
139#[inline]
140fn radix2_domain<F: PrimeField>(domain_size: usize) -> VidResult<Radix2EvaluationDomain<F>> {
141    Radix2EvaluationDomain::<F>::new(domain_size).ok_or_else(|| VidError::InvalidParam)
142}
143
144/// Dummy struct for AVID-M scheme.
145pub struct AvidMScheme;
146
147impl AvidMScheme {
148    /// Setup an instance for AVID-M scheme
149    pub fn setup(recovery_threshold: usize, total_weights: usize) -> VidResult<AvidMParam> {
150        AvidMParam::new(recovery_threshold, total_weights)
151    }
152}
153
154impl AvidMScheme {
155    /// Helper function.
156    /// Transform the payload bytes into a list of fields elements.
157    /// This function also pads the bytes with a 1 in the end, following by many 0's
158    /// until the length of the output is a multiple of `param.recovery_threshold`.
159    fn pad_to_fields(param: &AvidMParam, payload: &[u8]) -> Vec<F> {
160        // The number of bytes that can be encoded into a single F element.
161        let elem_bytes_len = bytes_to_field::elem_byte_capacity::<F>();
162
163        // A "chunk" is a byte slice whose size holds exactly `recovery_threshold`
164        // F elements.
165        let num_bytes_per_chunk = param.recovery_threshold * elem_bytes_len;
166
167        let remainder = (payload.len() + 1) % num_bytes_per_chunk;
168        let pad_num_zeros = (num_bytes_per_chunk - remainder) % num_bytes_per_chunk;
169
170        // Pad the payload with a 1 and many 0's.
171        bytes_to_field::<_, F>(
172            payload
173                .iter()
174                .chain(iter::once(&1u8))
175                .chain(iter::repeat_n(&0u8, pad_num_zeros)),
176        )
177        .collect()
178    }
179
180    /// Helper function.
181    /// Let `k = recovery_threshold` and `n = total_weights`. This function
182    /// partition the `payload` into many chunks, each containing `k` field
183    /// elements. Then each chunk is encoded into `n` field element with Reed
184    /// Solomon erasure code. They are then re-organized as `n` vectors, each
185    /// collecting one field element from each chunk. These `n` vectors are
186    /// then Merklized for commitment and membership proof generation.
187    #[allow(clippy::type_complexity)]
188    #[inline]
189    fn raw_encode(param: &AvidMParam, payload: &[F]) -> VidResult<(MerkleTree, Vec<Vec<F>>)> {
190        let domain = radix2_domain::<F>(param.total_weights)?; // See docs at `domains`.
191
192        let encoding_timer = start_timer!(|| "Encoding payload");
193
194        // RS-encode each chunk
195        let codewords: Vec<_> = payload
196            .par_chunks(param.recovery_threshold)
197            .map(|chunk| {
198                let mut fft_vec = domain.fft(chunk); // RS-encode the chunk
199                fft_vec.truncate(param.total_weights); // truncate the useless evaluations
200                fft_vec
201            })
202            .collect();
203        // Generate `total_weights` raw shares. Each share collects one field element
204        // from each encode chunk.
205        let raw_shares: Vec<_> = (0..param.total_weights)
206            .into_par_iter()
207            .map(|i| codewords.iter().map(|v| v[i]).collect::<Vec<F>>())
208            .collect();
209        end_timer!(encoding_timer);
210
211        let hash_timer = start_timer!(|| "Compressing each raw share");
212        let compressed_raw_shares = raw_shares
213            .par_iter()
214            .map(|v| Config::raw_share_digest(v))
215            .collect::<Result<Vec<_>, _>>()?;
216        end_timer!(hash_timer);
217
218        let mt_timer = start_timer!(|| "Constructing Merkle tree");
219        let mt = MerkleTree::from_elems(None, &compressed_raw_shares)?;
220        end_timer!(mt_timer);
221
222        Ok((mt, raw_shares))
223    }
224
225    /// Short hand for `pad_to_field` and `raw_encode`.
226    fn pad_and_encode(param: &AvidMParam, payload: &[u8]) -> VidResult<(MerkleTree, Vec<Vec<F>>)> {
227        let payload = Self::pad_to_fields(param, payload);
228        Self::raw_encode(param, &payload)
229    }
230
231    /// Consume in the constructed Merkle tree and the raw shares from `raw_encode`, provide the AvidM commitment and shares.
232    fn distribute_shares(
233        param: &AvidMParam,
234        distribution: &[u32],
235        mt: MerkleTree,
236        raw_shares: Vec<Vec<F>>,
237        payload_byte_len: usize,
238    ) -> VidResult<(AvidMCommit, Vec<AvidMShare>)> {
239        // let payload_byte_len = payload.len();
240        let total_weights = distribution.iter().map(|&w| w as usize).sum::<usize>();
241        if total_weights != param.total_weights {
242            return Err(VidError::Argument(
243                "Weight distribution is inconsistent with the given param".to_string(),
244            ));
245        }
246        if distribution.contains(&0u32) {
247            return Err(VidError::Argument("Weight cannot be zero".to_string()));
248        }
249
250        let distribute_timer = start_timer!(|| "Distribute codewords to the storage nodes");
251        // Distribute the raw shares to each storage node according to the weight
252        // distribution. For each chunk, storage `i` gets `distribution[i]`
253        // consecutive raw shares ranging as `ranges[i]`.
254        let ranges: Vec<_> = distribution
255            .iter()
256            .scan(0usize, |sum, w| {
257                let prefix_sum = *sum;
258                *sum += *w as usize;
259                Some(prefix_sum..*sum)
260            })
261            .collect();
262        let shares: Vec<_> = ranges
263            .par_iter()
264            .map(|range| {
265                range
266                    .clone()
267                    .map(|k| raw_shares[k].to_owned())
268                    .collect::<Vec<_>>()
269            })
270            .collect();
271        end_timer!(distribute_timer);
272
273        let mt_proof_timer = start_timer!(|| "Generate Merkle tree proofs");
274        let shares = shares
275            .into_iter()
276            .enumerate()
277            .map(|(i, payload)| AvidMShare {
278                index: i as u32,
279                payload_byte_len,
280                content: RawAvidMShare {
281                    range: ranges[i].clone(),
282                    payload,
283                    mt_proofs: ranges[i]
284                        .clone()
285                        .map(|k| {
286                            mt.lookup(k as u64)
287                                .expect_ok()
288                                .expect("MT lookup shouldn't fail")
289                                .1
290                        })
291                        .collect::<Vec<_>>(),
292                },
293            })
294            .collect::<Vec<_>>();
295        end_timer!(mt_proof_timer);
296
297        let commit = AvidMCommit {
298            commit: mt.commitment(),
299        };
300
301        Ok((commit, shares))
302    }
303
304    pub(crate) fn verify_internal(
305        param: &AvidMParam,
306        commit: &AvidMCommit,
307        share: &RawAvidMShare,
308    ) -> VidResult<crate::VerificationResult> {
309        if share.range.is_empty()
310            || share.range.end > param.total_weights
311            || share.range.len() != share.payload.len()
312            || share.range.len() != share.mt_proofs.len()
313        {
314            return Err(VidError::InvalidShare);
315        }
316        for (i, index) in share.range.clone().enumerate() {
317            let compressed_payload = Config::raw_share_digest(&share.payload[i])?;
318            if MerkleTree::verify(
319                commit.commit,
320                index as u64,
321                compressed_payload,
322                &share.mt_proofs[i],
323            )?
324            .is_err()
325            {
326                return Ok(Err(()));
327            }
328        }
329        Ok(Ok(()))
330    }
331
332    pub(crate) fn recover_fields(param: &AvidMParam, shares: &[AvidMShare]) -> VidResult<Vec<F>> {
333        let recovery_threshold: usize = param.recovery_threshold;
334
335        // Each share's payload contains some evaluations from `num_polys`
336        // polynomials.
337        let num_polys = shares
338            .iter()
339            .find(|s| !s.content.payload.is_empty())
340            .ok_or(VidError::Argument("All shares are empty".to_string()))?
341            .content
342            .payload[0]
343            .len();
344
345        let mut raw_shares = HashMap::new();
346        for share in shares {
347            if share.content.range.len() != share.content.payload.len()
348                || share.content.range.end > param.total_weights
349            {
350                return Err(VidError::InvalidShare);
351            }
352            for (i, p) in share.content.range.clone().zip(&share.content.payload) {
353                if p.len() != num_polys {
354                    return Err(VidError::InvalidShare);
355                }
356                if raw_shares.contains_key(&i) {
357                    return Err(VidError::InvalidShare);
358                }
359                raw_shares.insert(i, p);
360                if raw_shares.len() >= recovery_threshold {
361                    break;
362                }
363            }
364            if raw_shares.len() >= recovery_threshold {
365                break;
366            }
367        }
368
369        if raw_shares.len() < recovery_threshold {
370            return Err(VidError::InsufficientShares);
371        }
372
373        let domain = radix2_domain::<F>(param.total_weights)?;
374
375        // Lagrange interpolation
376        // step 1: find all evaluation points and their raw shares
377        let (x, raw_shares): (Vec<_>, Vec<_>) = raw_shares
378            .into_iter()
379            .map(|(i, p)| (domain.element(i), p))
380            .unzip();
381        // step 2: interpolate each polynomial
382        Ok((0..num_polys)
383            .into_par_iter()
384            .map(|poly_index| {
385                jf_utils::reed_solomon_code::reed_solomon_erasure_decode(
386                    x.iter().zip(raw_shares.iter().map(|p| p[poly_index])),
387                    recovery_threshold,
388                )
389                .map_err(|err| VidError::Internal(err.into()))
390            })
391            .collect::<Result<Vec<_>, _>>()?
392            .into_iter()
393            .flatten()
394            .collect())
395    }
396}
397
398impl VidScheme for AvidMScheme {
399    type Param = AvidMParam;
400
401    type Share = AvidMShare;
402
403    type Commit = AvidMCommit;
404
405    fn commit(param: &Self::Param, payload: &[u8]) -> VidResult<Self::Commit> {
406        let (mt, _) = Self::pad_and_encode(param, payload)?;
407        Ok(AvidMCommit {
408            commit: mt.commitment(),
409        })
410    }
411
412    fn disperse(
413        param: &Self::Param,
414        distribution: &[u32],
415        payload: &[u8],
416    ) -> VidResult<(Self::Commit, Vec<Self::Share>)> {
417        let (mt, raw_shares) = Self::pad_and_encode(param, payload)?;
418        Self::distribute_shares(param, distribution, mt, raw_shares, payload.len())
419    }
420
421    fn verify_share(
422        param: &Self::Param,
423        commit: &Self::Commit,
424        share: &Self::Share,
425    ) -> VidResult<crate::VerificationResult> {
426        Self::verify_internal(param, commit, &share.content)
427    }
428
429    /// Recover payload data from shares.
430    ///
431    /// # Requirements
432    /// - Total weight of all shares must be at least `recovery_threshold`.
433    /// - Each share's `payload` must have equal length.
434    /// - All shares must be verified under the given commitment.
435    ///
436    /// Shares beyond `recovery_threshold` are ignored.
437    fn recover(
438        param: &Self::Param,
439        _commit: &Self::Commit,
440        shares: &[Self::Share],
441    ) -> VidResult<Vec<u8>> {
442        let mut bytes: Vec<u8> = field_to_bytes(Self::recover_fields(param, shares)?).collect();
443        // Remove the trimming zeros and the last 1 to get the actual payload bytes.
444        // See `pad_to_fields`.
445        if let Some(pad_index) = bytes.iter().rposition(|&b| b != 0)
446            && bytes[pad_index] == 1u8
447        {
448            bytes.truncate(pad_index);
449            return Ok(bytes);
450        }
451        Err(VidError::Argument(
452            "Malformed payload, cannot find the padding position".to_string(),
453        ))
454    }
455}
456
457/// Unit tests
458#[cfg(test)]
459pub mod tests {
460    use rand::{RngCore, seq::SliceRandom};
461
462    use super::F;
463    use crate::{VidScheme, avidm::AvidMScheme, utils::bytes_to_field};
464
465    #[test]
466    fn test_padding() {
467        let elem_bytes_len = bytes_to_field::elem_byte_capacity::<F>();
468        let param = AvidMScheme::setup(2usize, 5usize).unwrap();
469        let bytes = vec![2u8; 1];
470        let padded = AvidMScheme::pad_to_fields(&param, &bytes);
471        assert_eq!(padded.len(), 2usize);
472        assert_eq!(padded, [F::from(2u32 + u8::MAX as u32 + 1), F::from(0)]);
473
474        let bytes = vec![2u8; elem_bytes_len * 2];
475        let padded = AvidMScheme::pad_to_fields(&param, &bytes);
476        assert_eq!(padded.len(), 4usize);
477    }
478
479    #[test]
480    fn round_trip() {
481        // play with these items
482        let params_list = [(2, 4), (3, 9), (5, 6), (15, 16)];
483        let payload_byte_lens = [1, 31, 32, 500];
484
485        // more items as a function of the above
486
487        let mut rng = jf_utils::test_rng();
488
489        for (recovery_threshold, num_storage_nodes) in params_list {
490            let weights: Vec<u32> = (0..num_storage_nodes)
491                .map(|_| rng.next_u32() % 5 + 1)
492                .collect();
493            let total_weights: u32 = weights.iter().sum();
494            let params = AvidMScheme::setup(recovery_threshold, total_weights as usize).unwrap();
495
496            for payload_byte_len in payload_byte_lens {
497                println!(
498                    "recovery_threshold:: {recovery_threshold} num_storage_nodes: \
499                     {num_storage_nodes} payload_byte_len: {payload_byte_len}"
500                );
501                println!("weights: {weights:?}");
502
503                let payload = {
504                    let mut bytes_random = vec![0u8; payload_byte_len];
505                    rng.fill_bytes(&mut bytes_random);
506                    bytes_random
507                };
508
509                let (commit, mut shares) =
510                    AvidMScheme::disperse(&params, &weights, &payload).unwrap();
511
512                assert_eq!(shares.len(), num_storage_nodes);
513
514                // verify shares
515                shares.iter().for_each(|share| {
516                    assert!(
517                        AvidMScheme::verify_share(&params, &commit, share).is_ok_and(|r| r.is_ok())
518                    )
519                });
520
521                // test payload recovery on a random subset of shares
522                shares.shuffle(&mut rng);
523                let mut cumulated_weights = 0;
524                let mut cut_index = 0;
525                while cumulated_weights <= recovery_threshold {
526                    cumulated_weights += shares[cut_index].content.range.len();
527                    cut_index += 1;
528                }
529                let payload_recovered =
530                    AvidMScheme::recover(&params, &commit, &shares[..cut_index]).unwrap();
531                assert_eq!(payload_recovered, payload);
532            }
533        }
534    }
535
536    #[test]
537    #[cfg(feature = "print-trace")]
538    fn round_trip_breakdown() {
539        use ark_std::{end_timer, start_timer};
540
541        let mut rng = jf_utils::test_rng();
542
543        let params = AvidMScheme::setup(50usize, 200usize).unwrap();
544        let weights = vec![2u32; 100usize];
545        let payload_byte_len = 1024 * 1024 * 32; // 32MB
546
547        let payload = {
548            let mut bytes_random = vec![0u8; payload_byte_len];
549            rng.fill_bytes(&mut bytes_random);
550            bytes_random
551        };
552
553        let disperse_timer = start_timer!(|| format!("Disperse {} bytes", payload_byte_len));
554        let (commit, shares) = AvidMScheme::disperse(&params, &weights, &payload).unwrap();
555        end_timer!(disperse_timer);
556
557        let recover_timer = start_timer!(|| "Recovery");
558        AvidMScheme::recover(&params, &commit, &shares).unwrap();
559        end_timer!(recover_timer);
560    }
561}