Needle
An application for fast and efficient searches of NGS data.
Loading...
Searching...
No Matches
ibf.h
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/needle/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
8#pragma once
9
10#include <iostream>
11#include <math.h>
12#include <numeric>
13#include <string>
14
15#include <seqan3/alphabet/container/concatenated_sequences.hpp>
16#include <seqan3/alphabet/nucleotide/dna4.hpp>
17#include <filesystem>
18
19#include "shared.h"
20
22{
23 std::filesystem::path include_file; // Needs to be defined when only minimisers appearing in this file should be stored
24 std::filesystem::path exclude_file; // Needs to be defined when minimisers appearing in this file should NOT be stored
25 std::vector<int> samples{}; // Can be used to indicate that sequence files belong to the same experiment
26 bool paired = false; // If true, than experiments are seen as paired-end experiments
27 bool experiment_names = false; // Flag, if names of experiment should be stored in a txt file
28 bool ram_friendly = false;
29};
30
33 int maxi;
34 RandomGenerator(int max) :
35 maxi(max) {
36 }
37
38 int operator()() {
39 return rand() % maxi;
40 }
41};
42
51void count(min_arguments const & args, std::vector<std::filesystem::path> sequence_files, std::filesystem::path include_file,
52 std::filesystem::path genome_file, bool paired);
53
59void count_genome(min_arguments const & args, std::filesystem::path include_file, std::filesystem::path exclude_file);
60
66void read_binary(std::filesystem::path filename, robin_hood::unordered_node_map<uint64_t, uint16_t> & hash_table);
67
74void read_binary_start(min_arguments & args, std::filesystem::path filename, uint64_t & num_of_minimisers, uint8_t & cutoff);
75
88std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & sequence_files, estimate_ibf_arguments & ibf_args,
89 minimiser_arguments & minimiser_args, std::vector<double> & fpr, std::vector<uint8_t> & cutoffs,
90 std::filesystem::path const expression_by_genome_file = "",
91 size_t num_hash = 1);
92
103std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & minimiser_files,
104 estimate_ibf_arguments & ibf_args, std::vector<double> & fpr,
105 std::filesystem::path const expression_by_genome_file = "",
106 size_t num_hash = 1);
107
114void minimiser(std::vector<std::filesystem::path> const & sequence_files, min_arguments const & args,
115 minimiser_arguments & minimiser_args, std::vector<uint8_t> & cutoffs);
116
129std::vector<uint16_t> insert(std::vector<std::filesystem::path> const & sequence_files,
130 estimate_ibf_arguments & ibf_args, minimiser_arguments & minimiser_args,
131 std::vector<uint8_t> & cutoffs,
132 std::filesystem::path const expression_by_genome_file, std::filesystem::path path_in, bool samplewise);
133
144std::vector<uint16_t> insert(std::vector<std::filesystem::path> const & minimiser_files,
145 estimate_ibf_arguments & ibf_args,
146 std::filesystem::path const expression_by_genome_file, std::filesystem::path path_in, bool samplewise);
147
155void delete_bin(std::vector<uint64_t> const & delete_files, estimate_ibf_arguments & ibf_args, std::filesystem::path path_in, bool samplewise);
void count_genome(min_arguments const &args, std::filesystem::path include_file, std::filesystem::path exclude_file)
Creates a set of minimizers to ignore, which should be used as an input to count.
Definition ibf.cpp:359
void minimiser(std::vector< std::filesystem::path > const &sequence_files, min_arguments const &args, minimiser_arguments &minimiser_args, std::vector< uint8_t > &cutoffs)
Create minimiser and header files.
Definition ibf.cpp:1489
void count(min_arguments const &args, std::vector< std::filesystem::path > sequence_files, std::filesystem::path include_file, std::filesystem::path genome_file, bool paired)
Get the concrete expression values (= median of all counts of one transcript) for given experiments....
Definition ibf.cpp:403
void delete_bin(std::vector< uint64_t > const &delete_files, estimate_ibf_arguments &ibf_args, std::filesystem::path path_in, bool samplewise)
Delete bins from ibfs.
Definition ibf.cpp:1365
std::vector< uint16_t > insert(std::vector< std::filesystem::path > const &sequence_files, estimate_ibf_arguments &ibf_args, minimiser_arguments &minimiser_args, std::vector< uint8_t > &cutoffs, std::filesystem::path const expression_by_genome_file, std::filesystem::path path_in, bool samplewise)
Insert into IBFs.
Definition ibf.cpp:1326
void read_binary(std::filesystem::path filename, robin_hood::unordered_node_map< uint64_t, uint16_t > &hash_table)
Reads a binary file that needle minimiser creates.
Definition ibf.cpp:466
std::vector< uint16_t > ibf(std::vector< std::filesystem::path > const &sequence_files, estimate_ibf_arguments &ibf_args, minimiser_arguments &minimiser_args, std::vector< double > &fpr, std::vector< uint8_t > &cutoffs, std::filesystem::path const expression_by_genome_file="", size_t num_hash=1)
Creates IBFs.
Definition ibf.cpp:956
void read_binary_start(min_arguments &args, std::filesystem::path filename, uint64_t &num_of_minimisers, uint8_t &cutoff)
Reads the beginning of a binary file that needle minimiser creates.
Definition ibf.cpp:498
Generates a random integer not greater than a given maximum.
Definition ibf.h:32
int maxi
Definition ibf.h:33
RandomGenerator(int max)
Definition ibf.h:34
int operator()()
Definition ibf.h:38
arguments used for estimate, ibf, ibfmin
Definition shared.h:41
arguments used for estimate, ibf, minimiser
Definition shared.h:32
Definition ibf.h:22
bool paired
Definition ibf.h:26
std::filesystem::path include_file
Definition ibf.h:23
bool ram_friendly
Definition ibf.h:28
std::vector< int > samples
Definition ibf.h:25
std::filesystem::path exclude_file
Definition ibf.h:24
bool experiment_names
Definition ibf.h:27