Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deps/libvgio
Submodule libvgio updated 1 files
+16 −1 CMakeLists.txt
32 changes: 32 additions & 0 deletions src/index_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,10 @@ construct_minimizers_impl(
*gbz, distance_index.get(), &oversized_zipcodes, params
);

// Close the distance index so it can't appear to be modified after the
// files that depend on it.
distance_index.reset();

string output_name = plan->output_filepath(minimizer_output);
save_minimizer(minimizers, output_name, IndexingParameters::verbosity == IndexingParameters::Debug);
output_name_minimizer.push_back(output_name);
Expand Down Expand Up @@ -5222,6 +5226,34 @@ vector<string> IndexRegistry::require(const IndexName& identifier) const {
return index->get_filenames();
}

bool IndexRegistry::predates(const IndexName& earlier, const IndexName& later) const {
// Get all the files
std::vector<std::string> earlier_files = require(earlier);
std::vector<std::string> later_files = require(later);

// Make sure they're nonempty
if (earlier_files.empty()) {
throw std::runtime_error(earlier + " index has no files");
}
if (later_files.empty()) {
throw std::runtime_error(later + " index has no files");
}

// Get all their modification times
std::filesystem::file_time_type (*predicate)(const std::filesystem::path&) = std::filesystem::last_write_time;
std::vector<std::filesystem::file_time_type> earlier_times;
std::transform(earlier_files.begin(), earlier_files.end(), std::back_inserter(earlier_times), predicate);
std::vector<std::filesystem::file_time_type> later_times;
std::transform(later_files.begin(), later_files.end(), std::back_inserter(later_times), predicate);

// Find where the times that shouldn't intersect are, and get them.
std::filesystem::file_time_type earlier_time = *std::max_element(earlier_times.begin(), earlier_times.end());
std::filesystem::file_time_type later_time = *std::max_element(later_times.begin(), later_times.end());

// Return if the earlier files are touched no later than the later files.
return earlier_time <= later_time;
}

void IndexRegistry::set_target_memory_usage(int64_t bytes) {
target_memory_usage = bytes;
}
Expand Down
7 changes: 7 additions & 0 deletions src/index_registry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,13 @@ class IndexRegistry {
/// Return true if the given index is available and can be require()'d, and
/// false otherwise.
bool available(const IndexName& identifier) const;

/// For two available indexes, returns true if the modification times
/// on the eariler index are no later than those on the later index.
///
/// Useful for enforcing that downstream indexes haven't had their upstream
/// indexes overwritten.
bool predates(const IndexName& earlier, const IndexName& later) const;

/// Get the possible filename(s) associated with the given index with the given prefix.
/// TODO: Get this to account for sample-scoped indexes.
Expand Down
2 changes: 1 addition & 1 deletion src/io/register_loader_saver_distance_index.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
* \file register_loader_saver_distance_index.cpp
* Defines IO for an XG index from stream files.
* Defines IO for a SnarlDistanceIndex index from stream files.
*/

#include <vg/io/registry.hpp>
Expand Down
39 changes: 26 additions & 13 deletions src/subcommand/giraffe_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1756,42 +1756,47 @@ int main_giraffe(int argc, char** argv) {
if (show_progress) {
logger.info() << "Loading Minimizer Index" << endl;
}
IndexName minimizer_indexname;
unique_ptr<gbwtgraph::DefaultMinimizerIndex> minimizer_index;
MinimizerIndexParameters::PayloadType payload_type = MinimizerIndexParameters::PAYLOAD_ZIPCODES;
if (map_long_reads) {
if (use_path_minimizer) {
minimizer_index = vg::io::VPKG::load_one<gbwtgraph::DefaultMinimizerIndex>(registry.require("Long Read PathMinimizers").at(0));
minimizer_indexname = "Long Read PathMinimizers";
payload_type = MinimizerIndexParameters::PAYLOAD_ZIPCODES_WITH_PATHS;
} else {
// Use the long read minimizers
minimizer_index = vg::io::VPKG::load_one<gbwtgraph::DefaultMinimizerIndex>(registry.require("Long Read Minimizers").at(0));
minimizer_indexname = "Long Read Minimizers";
}
} else {
minimizer_index = vg::io::VPKG::load_one<gbwtgraph::DefaultMinimizerIndex>(registry.require("Short Read Minimizers").at(0));
minimizer_indexname = "Short Read Minimizers";
}
if (!registry.predates("Giraffe Distance Index", minimizer_indexname)) {
logger.error() << registry.require("Giraffe Distance Index").at(0) << " is newer than " << registry.require(minimizer_indexname).at(0) << " which depends on it" << std::endl;
}
minimizer_index = vg::io::VPKG::load_one<gbwtgraph::DefaultMinimizerIndex>(registry.require(minimizer_indexname).at(0));
require_payload(*minimizer_index, payload_type);

// Grab the zipcodes
if (show_progress) {
logger.info() << "Loading Zipcodes" << endl;
}
IndexName oversized_zipcodes_indexname;
ZipCodeCollection oversized_zipcodes;
if (map_long_reads) {
if (use_path_minimizer) {
ifstream zip_in (registry.require("Long Read PathZipcodes").at(0));
oversized_zipcodes.deserialize(zip_in);
zip_in.close();
oversized_zipcodes_indexname = "Long Read PathZipcodes";
} else {
ifstream zip_in (registry.require("Long Read Zipcodes").at(0));
oversized_zipcodes.deserialize(zip_in);
zip_in.close();
oversized_zipcodes_indexname = "Long Read Zipcodes";
}

} else {
ifstream zip_in (registry.require("Short Read Zipcodes").at(0));
oversized_zipcodes.deserialize(zip_in);
zip_in.close();
oversized_zipcodes_indexname = "Short Read Zipcodes";
}
if (!registry.predates("Giraffe Distance Index", oversized_zipcodes_indexname)) {
logger.error() << registry.require("Giraffe Distance Index").at(0) << " is newer than " << registry.require(oversized_zipcodes_indexname).at(0) << " which depends on it" << std::endl;
}
ifstream zip_in (registry.require(oversized_zipcodes_indexname).at(0));
oversized_zipcodes.deserialize(zip_in);
zip_in.close();


// Grab the GBZ
Expand All @@ -1805,6 +1810,14 @@ int main_giraffe(int argc, char** argv) {
if (show_progress) {
logger.info() << "Loading Distance Index" << endl;
}
// TODO: Now that we enforce that the minimizer and zipcodes files are
// newer than the distance index, we really shouldn't modify it ourselves
// by fixing any indirect pointers that may still be in it. So we should be
// able to open the file read-only and map the file read-only here, which
// in turn would solve problems with writable mappings being slow on shared
// filesystems even when not being written. But the VPKG system doesn't
// really support doing that, so we'd have to get the file descriptor
// manually and deserialize() on it and close() it later.
auto distance_index = vg::io::VPKG::load_one<SnarlDistanceIndex>(registry.require("Giraffe Distance Index").at(0));

if (show_progress) {
Expand Down
7 changes: 5 additions & 2 deletions src/subcommand/minimizer_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,16 @@ int main_minimizer(int argc, char** argv) {
config.params
);

// Serialize the index and the oversized zipcodes.
// Close the distance index so it can't seem to be modified after the files
// that depend on it.
distance_index.reset();

// Serialize the minimizer index and the oversized zipcodes.
save_minimizer(index, config.output_name);
if (!config.zipcode_name.empty()) {
std::ofstream zip_out(config.zipcode_name);
oversized_zipcodes.serialize(zip_out);
zip_out.close();

}

if (config.progress) {
Expand Down
Loading