Skip to main content

vid/
avidm_gf2.rs

1//! This module implements the AVID-M scheme over GF2
2
3use std::{ops::Range, vec};
4
5use anyhow::anyhow;
6use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
7use jf_merkle_tree::{MerkleTreeScheme, append_only::MerkleTree as JfMerkleTree};
8use p3_maybe_rayon::prelude::*;
9use serde::{Deserialize, Serialize};
10use tagged_base64::tagged;
11
12use crate::{
13    VidError, VidResult, VidScheme,
14    utils::blake3::{Blake3DigestAlgorithm, Blake3Node},
15};
16
17/// Namespaced AvidmGf2 scheme
18pub mod namespaced;
19/// Namespace proofs for AvidmGf2 scheme
20pub mod proofs;
21
22/// Merkle tree scheme used in the VID. Uses BLAKE3 directly via
23/// [`Blake3DigestAlgorithm`] rather than going through the
24/// `jf_merkle_tree::hasher::HasherDigest` blanket impl, which would pin
25/// `blake3` to a `digest 0.10`-compatible release line.
26pub(crate) type MerkleTree = JfMerkleTree<Blake3Node, Blake3DigestAlgorithm, u64, 4, Blake3Node>;
27type MerkleProof = <MerkleTree as MerkleTreeScheme>::MembershipProof;
28type MerkleCommit = <MerkleTree as MerkleTreeScheme>::Commitment;
29
30/// Dummy struct for AVID-M scheme over GF2
31pub struct AvidmGf2Scheme;
32
33/// VID Parameters
34#[derive(Clone, Debug, Hash, Serialize, Deserialize, PartialEq, Eq)]
35pub struct AvidmGf2Param {
36    /// Total weights of all storage nodes
37    pub total_weights: usize,
38    /// Minimum collective weights required to recover the original payload.
39    pub recovery_threshold: usize,
40}
41
42impl AvidmGf2Param {
43    /// Construct a new [`AvidmGf2Param`].
44    pub fn new(recovery_threshold: usize, total_weights: usize) -> VidResult<Self> {
45        if recovery_threshold == 0 || total_weights < recovery_threshold {
46            return Err(VidError::InvalidParam);
47        }
48        Ok(Self {
49            total_weights,
50            recovery_threshold,
51        })
52    }
53}
54
55/// VID Share type to be distributed among the parties.
56#[derive(Clone, Debug, Hash, Serialize, Deserialize, PartialEq, Eq)]
57pub struct AvidmGf2Share {
58    /// Range of this share in the encoded payload.
59    range: Range<usize>,
60    /// Actual share content.
61    payload: Vec<Vec<u8>>,
62    /// Merkle proof of the content.
63    mt_proofs: Vec<MerkleProof>,
64}
65
66impl AvidmGf2Share {
67    /// Get the weight of this share
68    pub fn weight(&self) -> usize {
69        self.range.len()
70    }
71
72    /// Validate the share structure.
73    pub fn validate(&self) -> bool {
74        self.payload.len() == self.range.len() && self.mt_proofs.len() == self.range.len()
75    }
76}
77
78/// VID Commitment type
79#[derive(
80    Clone,
81    Copy,
82    Debug,
83    Default,
84    Hash,
85    CanonicalSerialize,
86    CanonicalDeserialize,
87    Eq,
88    PartialEq,
89    Ord,
90    PartialOrd,
91)]
92#[tagged("AvidmGf2Commit")]
93#[repr(C)]
94pub struct AvidmGf2Commit {
95    /// VID commitment is the Merkle tree root
96    pub commit: MerkleCommit,
97}
98
99impl AsRef<[u8]> for AvidmGf2Commit {
100    fn as_ref(&self) -> &[u8] {
101        self.commit.as_ref()
102    }
103}
104
105impl AsRef<[u8; 32]> for AvidmGf2Commit {
106    fn as_ref(&self) -> &[u8; 32] {
107        <Self as AsRef<[u8]>>::as_ref(self)
108            .try_into()
109            .expect("AvidmGf2Commit is always 32 bytes")
110    }
111}
112
113impl AvidmGf2Scheme {
114    /// Setup an instance for AVID-M scheme
115    pub fn setup(recovery_threshold: usize, total_weights: usize) -> VidResult<AvidmGf2Param> {
116        AvidmGf2Param::new(recovery_threshold, total_weights)
117    }
118
119    /// Build the `original_count` original shards directly from `payload`,
120    /// applying the AvidM-GF2 bit padding (one `0x01` byte at
121    /// `payload.len()` followed by zeros to fill the final shard).
122    ///
123    /// Writing the chunks straight out avoids allocating an intermediate
124    /// `shard_bytes * original_count`-byte buffer just to re-chunk it.
125    fn chunk_and_pad(
126        payload: &[u8],
127        shard_bytes: usize,
128        original_count: usize,
129    ) -> VidResult<Vec<Vec<u8>>> {
130        let padded_len = shard_bytes * original_count;
131        if padded_len < payload.len() + 1 {
132            return Err(VidError::Argument(
133                "Payload length is too large to fit in the given payload length".to_string(),
134            ));
135        }
136        let mut original: Vec<Vec<u8>> = Vec::with_capacity(original_count);
137        for i in 0..original_count {
138            let start = i * shard_bytes;
139            let mut chunk = vec![0u8; shard_bytes];
140            if start < payload.len() {
141                let end = ((i + 1) * shard_bytes).min(payload.len());
142                let take = end - start;
143                chunk[..take].copy_from_slice(&payload[start..end]);
144                if take < shard_bytes {
145                    // Pad byte falls inside this chunk.
146                    chunk[take] = 1u8;
147                }
148            } else if start == payload.len() {
149                // Payload ended exactly on a chunk boundary — pad byte is the
150                // first byte of this all-zero chunk.
151                chunk[0] = 1u8;
152            }
153            original.push(chunk);
154        }
155        Ok(original)
156    }
157
158    fn raw_disperse(
159        param: &AvidmGf2Param,
160        payload: &[u8],
161    ) -> VidResult<(MerkleTree, Vec<Vec<u8>>)> {
162        let original_count = param.recovery_threshold;
163        let recovery_count = param.total_weights - param.recovery_threshold;
164        let mut shard_bytes = (payload.len() + 1).div_ceil(original_count);
165        if shard_bytes % 2 == 1 {
166            shard_bytes += 1;
167        }
168        let original = Self::chunk_and_pad(payload, shard_bytes, original_count)?;
169        let recovery = if recovery_count == 0 {
170            vec![]
171        } else {
172            reed_solomon_simd::encode(original_count, recovery_count, &original)?
173        };
174
175        let shares = [original, recovery].concat();
176        let share_digests: Vec<Blake3Node> = shares
177            .par_iter()
178            .map(|share| Blake3Node::from(blake3::hash(share)))
179            .collect();
180        let mt = MerkleTree::from_elems(None, &share_digests)?;
181        Ok((mt, shares))
182    }
183}
184
185impl VidScheme for AvidmGf2Scheme {
186    type Param = AvidmGf2Param;
187    type Share = AvidmGf2Share;
188    type Commit = AvidmGf2Commit;
189
190    fn commit(param: &Self::Param, payload: &[u8]) -> VidResult<Self::Commit> {
191        let (mt, _) = Self::raw_disperse(param, payload)?;
192        Ok(Self::Commit {
193            commit: mt.commitment(),
194        })
195    }
196
197    fn disperse(
198        param: &Self::Param,
199        distribution: &[u32],
200        payload: &[u8],
201    ) -> VidResult<(Self::Commit, Vec<Self::Share>)> {
202        let total_weights = distribution.iter().map(|&w| w as usize).sum::<usize>();
203        if total_weights != param.total_weights {
204            return Err(VidError::Argument(
205                "Weight distribution is inconsistent with the given param".to_string(),
206            ));
207        }
208        if distribution.contains(&0u32) {
209            return Err(VidError::Argument("Weight cannot be zero".to_string()));
210        }
211        let (mt, shares) = Self::raw_disperse(param, payload)?;
212        let commit = AvidmGf2Commit {
213            commit: mt.commitment(),
214        };
215
216        let ranges: Vec<_> = distribution
217            .iter()
218            .scan(0usize, |sum, w| {
219                let prefix_sum = *sum;
220                *sum += *w as usize;
221                Some(prefix_sum..*sum)
222            })
223            .collect();
224        // Ranges partition `shares` and `proofs` in order. Consume both via
225        // owning iterators instead of `shares[range].to_vec()` /
226        // `proofs[range].to_vec()`, which would heap-clone every Vec<u8>
227        // payload and every per-leaf proof at high num_ns × total_weights.
228        //
229        // `mt.collect_leaves_with_proofs()` returns leaves in ascending
230        // position order (DFS over children 0..ARITY), so we can drain the
231        // iterator directly without an indexed placeholder Vec.
232        let mut shares_iter = shares.into_iter();
233        let payloads: Vec<Vec<Vec<u8>>> = ranges
234            .iter()
235            .map(|range| shares_iter.by_ref().take(range.len()).collect())
236            .collect();
237        let mut proofs_iter = mt
238            .collect_leaves_with_proofs()
239            .into_iter()
240            .map(|(_, _, proof)| proof);
241        let proof_groups: Vec<Vec<MerkleProof>> = ranges
242            .iter()
243            .map(|range| proofs_iter.by_ref().take(range.len()).collect())
244            .collect();
245        // The map body is just a struct construction over already-prepared
246        // owned components — sub-µs per item, smaller than rayon's
247        // per-item scheduling overhead. Stay sequential.
248        let shares: Vec<_> = ranges
249            .into_iter()
250            .zip(payloads)
251            .zip(proof_groups)
252            .map(|((range, payload), mt_proofs)| AvidmGf2Share {
253                range,
254                payload,
255                mt_proofs,
256            })
257            .collect();
258        Ok((commit, shares))
259    }
260
261    fn verify_share(
262        param: &Self::Param,
263        commit: &Self::Commit,
264        share: &Self::Share,
265    ) -> VidResult<crate::VerificationResult> {
266        if !share.validate() || share.range.is_empty() || share.range.end > param.total_weights {
267            return Err(VidError::InvalidShare);
268        }
269        // Each (i, leaf, proof) triple is independent. `find_any` short-
270        // circuits on the first failing position and avoids allocating any
271        // intermediate collection.
272        let start = share.range.start;
273        let len = share.range.end - start;
274        match (0..len)
275            .into_par_iter()
276            .map(|i| -> VidResult<crate::VerificationResult> {
277                let payload_digest = Blake3Node::from(blake3::hash(&share.payload[i]));
278                MerkleTree::verify(
279                    commit.commit,
280                    (start + i) as u64,
281                    payload_digest,
282                    &share.mt_proofs[i],
283                )
284                .map_err(VidError::from)
285            })
286            .find_any(|r| !matches!(r, Ok(Ok(()))))
287        {
288            None => Ok(Ok(())),
289            Some(Ok(v)) => Ok(v),
290            Some(Err(e)) => Err(e),
291        }
292    }
293
294    fn recover(
295        param: &Self::Param,
296        _commit: &Self::Commit,
297        shares: &[Self::Share],
298    ) -> VidResult<Vec<u8>> {
299        let original_count = param.recovery_threshold;
300        let recovery_count = param.total_weights - param.recovery_threshold;
301        // Find the first non-empty share
302        let Some(first_share) = shares.iter().find(|s| !s.payload.is_empty()) else {
303            return Err(VidError::InsufficientShares);
304        };
305        let shard_bytes = first_share.payload[0].len();
306
307        // Track references to input original shards; avoids the per-shard
308        // `.clone()` the previous version did to populate a
309        // `Vec<Option<Vec<u8>>>`. Reconstructed shards come from the decoder
310        // and are copied directly into the output buffer below.
311        let mut input_orig: Vec<Option<&[u8]>> = vec![None; original_count];
312
313        let mut recovered: Vec<u8> = Vec::with_capacity(original_count * shard_bytes);
314        if recovery_count == 0 {
315            // Edge case where there are no recovery shares: every original must
316            // be supplied as input.
317            for share in shares {
318                if !share.validate() || share.payload.iter().any(|p| p.len() != shard_bytes) {
319                    return Err(VidError::InvalidShare);
320                }
321                for (i, index) in share.range.clone().enumerate() {
322                    if index < original_count {
323                        input_orig[index] = Some(&share.payload[i]);
324                    }
325                }
326            }
327            for slot in &input_orig {
328                let shard = slot
329                    .ok_or_else(|| VidError::Internal(anyhow!("Failed to recover the payload.")))?;
330                recovered.extend_from_slice(shard);
331            }
332        } else {
333            let mut decoder = reed_solomon_simd::ReedSolomonDecoder::new(
334                original_count,
335                recovery_count,
336                shard_bytes,
337            )?;
338            for share in shares {
339                if !share.validate() || share.payload.iter().any(|p| p.len() != shard_bytes) {
340                    return Err(VidError::InvalidShare);
341                }
342                for (i, index) in share.range.clone().enumerate() {
343                    let shard = &share.payload[i];
344                    if index < original_count {
345                        input_orig[index] = Some(shard);
346                        decoder.add_original_shard(index, shard)?;
347                    } else {
348                        decoder.add_recovery_shard(index - original_count, shard)?;
349                    }
350                }
351            }
352
353            let result = decoder.decode()?;
354            for (i, shard) in input_orig.iter().enumerate().take(original_count) {
355                let shard: &[u8] = match shard {
356                    Some(data) => data,
357                    None => result.restored_original(i).ok_or_else(|| {
358                        VidError::Internal(anyhow!("Failed to recover the payload."))
359                    })?,
360                };
361                recovered.extend_from_slice(shard);
362            }
363        }
364        match recovered.iter().rposition(|&b| b != 0) {
365            Some(pad_index) if recovered[pad_index] == 1u8 => {
366                recovered.truncate(pad_index);
367                Ok(recovered)
368            },
369            _ => Err(VidError::Argument(
370                "Malformed payload, cannot find the padding position".to_string(),
371            )),
372        }
373    }
374}
375
376/// Unit tests
377#[cfg(test)]
378pub mod tests {
379    use rand::{RngCore, seq::SliceRandom};
380
381    use super::AvidmGf2Scheme;
382    use crate::VidScheme;
383
384    #[test]
385    fn round_trip() {
386        // play with these items
387        let num_storage_nodes_list = [4, 9, 16];
388        let payload_byte_lens = [1, 31, 32, 500];
389
390        // more items as a function of the above
391
392        let mut rng = jf_utils::test_rng();
393
394        for num_storage_nodes in num_storage_nodes_list {
395            let weights: Vec<u32> = (0..num_storage_nodes)
396                .map(|_| rng.next_u32() % 5 + 1)
397                .collect();
398            let total_weights: u32 = weights.iter().sum();
399            let recovery_threshold = total_weights.div_ceil(3) as usize;
400            let params = AvidmGf2Scheme::setup(recovery_threshold, total_weights as usize).unwrap();
401
402            for payload_byte_len in payload_byte_lens {
403                let payload = {
404                    let mut bytes_random = vec![0u8; payload_byte_len];
405                    rng.fill_bytes(&mut bytes_random);
406                    bytes_random
407                };
408
409                let (commit, mut shares) =
410                    AvidmGf2Scheme::disperse(&params, &weights, &payload).unwrap();
411
412                assert_eq!(shares.len(), num_storage_nodes);
413
414                // verify shares
415                shares.iter().for_each(|share| {
416                    assert!(
417                        AvidmGf2Scheme::verify_share(&params, &commit, share)
418                            .is_ok_and(|r| r.is_ok())
419                    )
420                });
421
422                // test payload recovery on a random subset of shares
423                shares.shuffle(&mut rng);
424                let mut cumulated_weights = 0;
425                let mut cut_index = 0;
426                while cumulated_weights < recovery_threshold {
427                    cumulated_weights += shares[cut_index].weight();
428                    cut_index += 1;
429                }
430                let payload_recovered =
431                    AvidmGf2Scheme::recover(&params, &commit, &shares[..cut_index]).unwrap();
432                assert_eq!(payload_recovered, payload);
433            }
434        }
435    }
436
437    #[test]
438    fn round_trip_edge_case() {
439        // play with these items
440        let num_storage_nodes_list = [4, 9, 16];
441        let payload_byte_lens = [1, 31, 32, 500];
442
443        // more items as a function of the above
444
445        let mut rng = jf_utils::test_rng();
446
447        for num_storage_nodes in num_storage_nodes_list {
448            let weights: Vec<u32> = (0..num_storage_nodes)
449                .map(|_| rng.next_u32() % 5 + 1)
450                .collect();
451            let total_weights: u32 = weights.iter().sum();
452            let recovery_threshold = total_weights as usize;
453            let params = AvidmGf2Scheme::setup(recovery_threshold, total_weights as usize).unwrap();
454
455            for payload_byte_len in payload_byte_lens {
456                let payload = {
457                    let mut bytes_random = vec![0u8; payload_byte_len];
458                    rng.fill_bytes(&mut bytes_random);
459                    bytes_random
460                };
461
462                let (commit, mut shares) =
463                    AvidmGf2Scheme::disperse(&params, &weights, &payload).unwrap();
464
465                assert_eq!(shares.len(), num_storage_nodes);
466
467                // verify shares
468                shares.iter().for_each(|share| {
469                    assert!(
470                        AvidmGf2Scheme::verify_share(&params, &commit, share)
471                            .is_ok_and(|r| r.is_ok())
472                    )
473                });
474
475                // test payload recovery on a random subset of shares
476                shares.shuffle(&mut rng);
477                let payload_recovered =
478                    AvidmGf2Scheme::recover(&params, &commit, &shares[..]).unwrap();
479                assert_eq!(payload_recovered, payload);
480            }
481        }
482    }
483
484    #[test]
485    fn disperse_rejects_inconsistent_distribution() {
486        let total_weights = 10usize;
487        let recovery_threshold = 4;
488        let params = AvidmGf2Scheme::setup(recovery_threshold, total_weights).unwrap();
489        let payload = vec![1u8; 100];
490
491        // distribution sums to 12, but param says total_weights=10
492        let bad_weights = vec![3u32; 4];
493        assert!(
494            AvidmGf2Scheme::disperse(&params, &bad_weights, &payload).is_err(),
495            "disperse should reject distribution that doesn't sum to total_weights"
496        );
497
498        // distribution contains a zero weight
499        let zero_weight = vec![0u32, 5, 5];
500        assert!(
501            AvidmGf2Scheme::disperse(&params, &zero_weight, &payload).is_err(),
502            "disperse should reject zero-weight entries"
503        );
504
505        // correct distribution should succeed
506        let good_weights = vec![2u32; 5];
507        assert!(AvidmGf2Scheme::disperse(&params, &good_weights, &payload).is_ok());
508    }
509
510    #[test]
511    fn verify_share_rejects_out_of_range() {
512        let total_weights = 10usize;
513        let recovery_threshold = 4;
514        let params = AvidmGf2Scheme::setup(recovery_threshold, total_weights).unwrap();
515        let payload = vec![1u8; 100];
516        let weights = vec![2u32; 5];
517
518        let (commit, shares) = AvidmGf2Scheme::disperse(&params, &weights, &payload).unwrap();
519
520        // valid shares pass
521        for share in &shares {
522            assert!(AvidmGf2Scheme::verify_share(&params, &commit, share).is_ok_and(|r| r.is_ok()));
523        }
524
525        // a share verified against a smaller param should be rejected
526        let smaller_params = AvidmGf2Scheme::setup(2, 5).unwrap();
527        let last_share = shares.last().unwrap();
528        assert!(
529            AvidmGf2Scheme::verify_share(&smaller_params, &commit, last_share).is_err(),
530            "verify_share should reject share with range.end > param.total_weights"
531        );
532    }
533}