Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion gix-hash/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ test = false

[features]
default = ["sha1"]
## Support for SHA-1 digests.
## Support for SHA1 digests.
sha1 = []
## Support for SHA256 digests.
sha256 = []
## Data structures implement `serde::Serialize` and `serde::Deserialize`.
serde = ["dep:serde", "faster-hex/serde"]

Expand All @@ -29,6 +31,7 @@ thiserror = "2.0.17"
faster-hex = { version = "0.10.0", default-features = false, features = ["std"] }
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }
sha1-checked = { version = "0.10.0", default-features = false }
sha2 = { version = "0.10.0", default-features = false }

document-features = { version = "0.2.0", optional = true }

Expand Down
77 changes: 49 additions & 28 deletions gix-hash/src/hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,53 +11,72 @@ pub(super) mod _impl {

use crate::hasher::Error;

/// An implementation of the Sha1 hash, which can be used once.
///
/// We use [`sha1_checked`] to implement the same collision detection
/// algorithm as Git.
/// Hash implementations that can be used once.
#[derive(Clone)]
pub struct Hasher(sha1_checked::Sha1);
pub enum Hasher {
/// An implementation of the SHA1 hash.
///
/// We use [`sha1_checked`] to implement the same collision detection algorithm as Git.
Sha1(sha1_checked::Sha1),
/// An implementation of the SHA256 hash.
#[cfg(feature = "sha256")]
Sha256(sha2::Sha256),
}

impl Hasher {
/// Let's not provide a public default implementation to force people to go through [`hasher()`].
fn default() -> Self {
/// Let's not make this public to force people to go through [`hasher()`].
fn new_sha1() -> Self {
// This matches the configuration used by Git, which only uses
// the collision detection to bail out, rather than computing
// alternate “safe hashes” for inputs where a collision attack
// was detected.
Self(sha1_checked::Builder::default().safe_hash(false).build())
Self::Sha1(sha1_checked::Builder::default().safe_hash(false).build())
}

/// Let's not make this public to force people to go through [`hasher()`].
#[cfg(feature = "sha256")]
fn new_sha256() -> Self {
Self::Sha256(sha2::Sha256::new())
}
}

impl Hasher {
/// Digest the given `bytes`.
pub fn update(&mut self, bytes: &[u8]) {
self.0.update(bytes);
match self {
Hasher::Sha1(sha1) => sha1.update(bytes),
#[cfg(feature = "sha256")]
Hasher::Sha256(sha256) => sha256.update(bytes),
}
}

/// Finalize the hash and produce an object ID.
/// Finalize the hash and produce an object id.
///
/// Returns [`Error`] if a collision attack is detected.
#[inline]
pub fn try_finalize(self) -> Result<crate::ObjectId, Error> {
match self.0.try_finalize() {
CollisionResult::Ok(digest) => Ok(crate::ObjectId::Sha1(digest.into())),
CollisionResult::Mitigated(_) => {
// SAFETY: `CollisionResult::Mitigated` is only
// returned when `safe_hash()` is on. `Hasher`’s field
// is private, and we only construct it in the
// `Default` instance, which turns `safe_hash()` off.
//
// As of Rust 1.84.1, the compiler can’t figure out
// this function cannot panic without this.
#[allow(unsafe_code)]
unsafe {
std::hint::unreachable_unchecked()
match self {
Hasher::Sha1(sha1) => match sha1.try_finalize() {
CollisionResult::Ok(digest) => Ok(crate::ObjectId::Sha1(digest.into())),
CollisionResult::Mitigated(_) => {
// SAFETY: `CollisionResult::Mitigated` is only
// returned when `safe_hash()` is on. `Hasher`’s field
// is private, and we only construct it in the
// `Default` instance, which turns `safe_hash()` off.
//
// As of Rust 1.84.1, the compiler can’t figure out
// this function cannot panic without this.
#[allow(unsafe_code)]
unsafe {
std::hint::unreachable_unchecked()
}
}
}
CollisionResult::Collision(digest) => Err(Error::CollisionAttack {
digest: crate::ObjectId::Sha1(digest.into()),
}),
CollisionResult::Collision(digest) => Err(Error::CollisionAttack {
digest: crate::ObjectId::Sha1(digest.into()),
}),
},
#[cfg(feature = "sha256")]
Hasher::Sha256(sha256) => Ok(crate::ObjectId::Sha256(sha256.finalize().into())),
}
}
}
Expand All @@ -66,7 +85,9 @@ pub(super) mod _impl {
#[inline]
pub fn hasher(kind: crate::Kind) -> Hasher {
match kind {
crate::Kind::Sha1 => Hasher::default(),
crate::Kind::Sha1 => Hasher::new_sha1(),
#[cfg(feature = "sha256")]
crate::Kind::Sha256 => Hasher::new_sha256(),
}
}
}
5 changes: 5 additions & 0 deletions gix-hash/src/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ pub(super) mod _impl {
inner,
hash: crate::hasher(object_hash),
},
#[cfg(feature = "sha256")]
crate::Kind::Sha256 => Write {
inner,
hash: crate::hasher(object_hash),
},
}
}
}
Expand Down
53 changes: 45 additions & 8 deletions gix-hash/src/kind.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
use std::str::FromStr;

use crate::{oid, Kind, ObjectId};
use crate::{oid, Kind, ObjectId, SIZE_OF_SHA1_DIGEST, SIZE_OF_SHA1_HEX_DIGEST};

#[cfg(feature = "sha256")]
use crate::{SIZE_OF_SHA256_DIGEST, SIZE_OF_SHA256_HEX_DIGEST};

impl TryFrom<u8> for Kind {
type Error = u8;

fn try_from(value: u8) -> Result<Self, Self::Error> {
Ok(match value {
1 => Kind::Sha1,
#[cfg(feature = "sha256")]
2 => Kind::Sha256,
unknown => return Err(unknown),
})
}
Expand All @@ -19,6 +24,8 @@ impl FromStr for Kind {
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"sha1" | "SHA1" => Kind::Sha1,
#[cfg(feature = "sha256")]
"sha256" | "SHA256" => Kind::Sha256,
other => return Err(other.into()),
})
}
Expand All @@ -28,6 +35,8 @@ impl std::fmt::Display for Kind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Kind::Sha1 => f.write_str("SHA1"),
#[cfg(feature = "sha256")]
Kind::Sha256 => f.write_str("SHA256"),
}
}
}
Expand All @@ -36,13 +45,27 @@ impl Kind {
/// Returns the shortest hash we support.
#[inline]
pub const fn shortest() -> Self {
Self::Sha1
#[cfg(all(not(feature = "sha1"), feature = "sha256"))]
{
Self::Sha256
}
#[cfg(feature = "sha1")]
{
Self::Sha1
}
}

/// Returns the longest hash we support.
#[inline]
pub const fn longest() -> Self {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that there is this logic, it feels like tests for longest/shortest would be good. They'd have to repeat the feature flags so I am not a huge fan, but it really feels like there should be something in place to prevent this from going wrong, ever.
I'd be OK if you said 'better not' as this seems to be correct and when Git introduces a new hash in 10y+, we'd be looking at a completely different environment anyway.

Self::Sha1
#[cfg(feature = "sha256")]
{
Self::Sha256
}
#[cfg(all(not(feature = "sha256"), feature = "sha1"))]
{
Self::Sha1
}
}

/// Returns a buffer suitable to hold the longest possible hash in hex.
Expand All @@ -61,23 +84,31 @@ impl Kind {
#[inline]
pub const fn len_in_hex(&self) -> usize {
match self {
Kind::Sha1 => 40,
Kind::Sha1 => SIZE_OF_SHA1_HEX_DIGEST,
#[cfg(feature = "sha256")]
Kind::Sha256 => SIZE_OF_SHA256_HEX_DIGEST,
}
}

/// Returns the amount of bytes taken up by the hash of this instance.
#[inline]
pub const fn len_in_bytes(&self) -> usize {
match self {
Kind::Sha1 => 20,
Kind::Sha1 => SIZE_OF_SHA1_DIGEST,
#[cfg(feature = "sha256")]
Kind::Sha256 => SIZE_OF_SHA256_DIGEST,
}
}

/// Returns the kind of hash that would fit the given `hex_len`, or `None` if there is no fitting hash.
/// Note that `0` as `hex_len` up to 40 always yields `Sha1`.
/// Note that `0` as `hex_len` up to 40 always yields `SHA1` while anything in the range 41..64
/// always yields `SHA256` if it is enabled.
#[inline]
pub const fn from_hex_len(hex_len: usize) -> Option<Self> {
Some(match hex_len {
0..=40 => Kind::Sha1,
0..=SIZE_OF_SHA1_HEX_DIGEST => Kind::Sha1,
#[cfg(feature = "sha256")]
0..=SIZE_OF_SHA256_HEX_DIGEST => Kind::Sha256,
_ => return None,
})
}
Expand All @@ -93,7 +124,9 @@ impl Kind {
#[inline]
pub(crate) fn from_len_in_bytes(bytes: usize) -> Self {
match bytes {
20 => Kind::Sha1,
SIZE_OF_SHA1_DIGEST => Kind::Sha1,
#[cfg(feature = "sha256")]
SIZE_OF_SHA256_DIGEST => Kind::Sha256,
_ => panic!("BUG: must be called only with valid hash lengths produced by len_in_bytes()"),
}
}
Expand All @@ -103,6 +136,8 @@ impl Kind {
pub fn null_ref(&self) -> &'static oid {
match self {
Kind::Sha1 => oid::null_sha1(),
#[cfg(feature = "sha256")]
Kind::Sha256 => oid::null_sha256(),
}
}

Expand All @@ -111,6 +146,8 @@ impl Kind {
pub const fn null(&self) -> ObjectId {
match self {
Kind::Sha1 => ObjectId::null_sha1(),
#[cfg(feature = "sha256")]
Kind::Sha256 => ObjectId::null_sha256(),
}
}

Expand Down
26 changes: 24 additions & 2 deletions gix-hash/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg))]
#![deny(missing_docs, rust_2018_idioms, unsafe_code)]

// Remove this once other hashes (e.g., SHA-256, and potentially others)
// Remove this once other hashes (e.g., SHA256, and potentially others)
// are supported, and this crate can build without [`ObjectId::Sha1`].
#[cfg(not(feature = "sha1"))]
compile_error!("Please set the `sha1` feature flag");
Expand Down Expand Up @@ -48,15 +48,37 @@ pub struct Prefix {

/// The size of a SHA1 hash digest in bytes.
const SIZE_OF_SHA1_DIGEST: usize = 20;
/// The size of a SHA1 hash digest in hex.
const SIZE_OF_SHA1_HEX_DIGEST: usize = 2 * SIZE_OF_SHA1_DIGEST;

/// The size of a SHA256 hash digest in bytes.
#[cfg(feature = "sha256")]
const SIZE_OF_SHA256_DIGEST: usize = 32;
/// The size of a SHA256 hash digest in hex.
#[cfg(feature = "sha256")]
const SIZE_OF_SHA256_HEX_DIGEST: usize = 2 * SIZE_OF_SHA256_DIGEST;

const EMPTY_BLOB_SHA1: &[u8; SIZE_OF_SHA1_DIGEST] =
b"\xe6\x9d\xe2\x9b\xb2\xd1\xd6\x43\x4b\x8b\x29\xae\x77\x5a\xd8\xc2\xe4\x8c\x53\x91";
const EMPTY_TREE_SHA1: &[u8; SIZE_OF_SHA1_DIGEST] =
b"\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04";

#[cfg(feature = "sha256")]
const EMPTY_BLOB_SHA256: &[u8; SIZE_OF_SHA256_DIGEST] = b"\x47\x3a\x0f\x4c\x3b\xe8\xa9\x36\x81\xa2\x67\xe3\xb1\xe9\xa7\xdc\xda\x11\x85\x43\x6f\xe1\x41\xf7\x74\x91\x20\xa3\x03\x72\x18\x13";
#[cfg(feature = "sha256")]
const EMPTY_TREE_SHA256: &[u8; SIZE_OF_SHA256_DIGEST] = b"\x6e\xf1\x9b\x41\x22\x5c\x53\x69\xf1\xc1\x04\xd4\x5d\x8d\x85\xef\xa9\xb0\x57\xb5\x3b\x14\xb4\xb9\xb9\x39\xdd\x74\xde\xcc\x53\x21";

/// Denotes the kind of function to produce a [`ObjectId`].
#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub enum Kind {
/// The Sha1 hash with 160 bits.
/// The SHA1 hash with 160 bits.
#[default]
Sha1 = 1,
/// The SHA256 hash with 256 bits.
#[cfg(feature = "sha256")]
Sha256 = 2,
}

mod kind;
Loading
Loading