Raptor 3.0.0
A fast and space-efficient pre-filter for querying very large collections of nucleotide sequences
index_factory.hpp
Go to the documentation of this file.
1// --------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
6// --------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <seqan3/search/views/minimiser_hash.hpp>
16
23#include <raptor/index.hpp>
24
25namespace raptor
26{
27
29{
30public:
31 index_factory() = default;
32 index_factory(index_factory const &) = default;
33 index_factory(index_factory &&) = default;
34 index_factory & operator=(index_factory const &) = default;
35 index_factory & operator=(index_factory &&) = default;
36 ~index_factory() = default;
37
38 explicit index_factory(build_arguments const & args) : arguments{std::addressof(args)}
39 {
40 if (arguments->input_is_minimiser)
42 else
43 reader = file_reader<file_types::sequence>{arguments->shape, arguments->window_size};
44 }
45
46 explicit index_factory(build_arguments const & args, partition_config const & cfg) :
47 arguments{std::addressof(args)},
48 config{std::addressof(cfg)}
49 {
50 if (arguments->input_is_minimiser)
51 reader = file_reader<file_types::minimiser>{}; // GCOVR_EXCL_LINE
52 else
53 reader = file_reader<file_types::sequence>{arguments->shape, arguments->window_size};
54 }
55
56 [[nodiscard]] raptor_index<> operator()(size_t const part = 0u) const
57 {
58 return construct(part);
59 }
60
61private:
62 build_arguments const * const arguments{nullptr};
63 partition_config const * const config{nullptr};
65
66 raptor_index<> construct(size_t const part) const
67 {
68 assert(arguments != nullptr);
69
70 arguments->index_allocation_timer.start();
71 raptor_index<> index{*arguments};
72 arguments->index_allocation_timer.stop();
73
74 auto worker = [&](auto && zipped_view, auto &&)
75 {
76 timer<concurrent::no> local_timer{};
77 auto & ibf = index.ibf();
78 local_timer.start();
79 for (auto && [file_names, bin_number] : zipped_view)
80 {
82 [&](auto const & reader)
83 {
84 if (config == nullptr)
85 reader.hash_into(file_names, emplacer(ibf, seqan3::bin_index{bin_number}));
86 else
87 reader.hash_into_if(file_names,
88 emplacer(ibf, seqan3::bin_index{bin_number}),
89 [&](uint64_t const hash)
90 {
91 return config->hash_partition(hash) == part;
92 });
93 },
94 reader);
95 }
96 local_timer.stop();
97 arguments->user_bin_io_timer += local_timer;
98 arguments->fill_ibf_timer += local_timer;
99 };
100
101 call_parallel_on_bins(worker, arguments->bin_path, arguments->threads);
102
103 return index;
104 }
105};
106
107} // namespace raptor
T addressof(T... args)
Provides raptor::adjust_seed.
Provides raptor::call_parallel_on_bins.
Definition: file_reader.hpp:115
Definition: file_reader.hpp:36
Definition: index_factory.hpp:29
Definition: index.hpp:54
Definition: timer.hpp:30
Provides raptor::dna4_traits.
Provides raptor::emplace_iterator.
Provides raptor::file_reader.
Provides raptor::raptor_index.
Provides raptor::partition_config.
Definition: build_arguments.hpp:28
Definition: partition_config.hpp:23
T visit(T... args)