vcfpp
Public Member Functions | Public Attributes | Friends | List of all members
vcfpp::BcfRecord Class Reference

Object represents a variant record in the VCF, offering methods to access and modify fields. More...

#include <vcfpp.h>

Public Member Functions

 BcfRecord ()
 empty constructor. call init() afterwards
 
 BcfRecord (BcfHeader &h)
 constructor with a given BcfHeader object
 
void initHeader (BcfHeader &h)
 initilize the header associated with BcfRecord object by pointing to another BcfHeader object
 
void resetHeader (BcfHeader &h)
 reset the header associated with BcfRecord object by pointing to another BcfHeader object
 
std::string asString () const
 return current variant as raw string
 
template<typename T >
isValidGT< T > getGenotypes (T &v)
 fill in the input vector with genotypes of 0 and 1. only works for ploidy<=2. Genotypes with missing allele is coded as heterozygous More...
 
bool getGenotypes (std::vector< int > &v)
 fill in the input vector with genotype values, 0, 1 or -9 (missing). More...
 
template<typename T , typename S = typename T::value_type>
isFormatVector< T > getFORMAT (std::string tag, T &v)
 get tag value in FORMAT More...
 
bool getFORMAT (std::string tag, std::vector< std::string > &v)
 get tag value in FORMAT More...
 
template<typename T , typename S = typename T::value_type>
isInfoVector< T > getINFO (std::string tag, T &v)
 get tag value in INFO More...
 
template<typename T >
isScalar< T > getINFO (std::string tag, T &v)
 get tag value in INFO More...
 
template<typename T >
isString< T > getINFO (std::string tag, T &v)
 get tag value in INFO More...
 
template<typename T >
isScalar< T > setINFO (std::string tag, const T &v)
 set tag value for INFO More...
 
template<typename T >
isValidInfo< T > setINFO (std::string tag, const T &v)
 set tag value for INFO More...
 
void removeINFO (std::string tag)
 remove the given tag from INFO of the variant
 
bool setGenotypes (const std::vector< int > &v)
 set genotypes from scratch even if genotypes not present More...
 
void setPhasing (const std::vector< char > &v)
 set phasing status for all diploid samples using given vector More...
 
void removeFORMAT (std::string tag)
 remove the given tag from FORMAT of the variant
 
template<typename T >
isValidFMT< T > setFORMAT (std::string tag, const T &v)
 set tag values for all samples in FORMAT using given vector More...
 
template<typename T >
isScalar< T > setFORMAT (std::string tag, const T &v)
 set tag for a single sample in FORMAT using given singular value. this works only when there is one sample in the vcf More...
 
void addLineFromString (const std::string &vcfline)
 add one variant record from given string
 
bool isNoneMissing () const
 if all samples have non missing values for the tag in FORMAT
 
bool isSV () const
 return boolean value indicates if current variant is Structual Variant or not
 
bool isIndel () const
 return boolean value indicates if current variant is exclusively INDEL
 
bool isMultiAllelics () const
 return boolean value indicates if current variant is multiallelic sites
 
bool isMultiAllelicSNP () const
 return boolean value indicates if current variant is exclusively multiallelic SNP sites
 
bool isSNP () const
 return boolean value indicates if current variant is exclusively biallelic SNP. Note ALT=* are skipped
 
bool hasSNP () const
 return boolean value indicates if current variant has SNP type defined in vcf.h (htslib>=1.16)
 
bool hasINDEL () const
 return boolean value indicates if current variant has INDEL type defined in vcf.h (htslib>=1.16)
 
bool hasINS () const
 return boolean value indicates if current variant has INS type defined in vcf.h (htslib>=1.16)
 
bool hasDEL () const
 return boolean value indicates if current variant has DEL type defined in vcf.h (htslib>=1.16)
 
bool hasMNP () const
 return boolean value indicates if current variant has MNP type defined in vcf.h (htslib>=1.16)
 
bool hasBND () const
 return boolean value indicates if current variant has BND type defined in vcf.h (htslib>=1.16)
 
bool hasOTHER () const
 return boolean value indicates if current variant has OTHER type defined in vcf.h (htslib>=1.16)
 
bool hasOVERLAP () const
 return boolean value indicates if current variant has OVERLAP type defined in vcf.h (htslib>=1.16)
 
std::string CHROM () const
 return CHROM name
 
std::string ID () const
 return ID field
 
int64_t POS () const
 return 1-base position
 
void setCHR (const char *chr)
 modify CHROM value
 
void setPOS (int64_t p)
 modify position given 1-based value
 
void setID (const char *s)
 update ID
 
void setRefAlt (const char *alleles_string)
 set REF and ALT alleles given a string seperated by comma
 
int64_t Start () const
 return 0-base start of the variant (can be any type)
 
int64_t End () const
 return 0-base end of the variant (can be any type)
 
std::string REF () const
 return raw REF alleles as string
 
void swap_REF_ALT ()
 swap REF and ALT for biallelic SNP
 
std::string ALT () const
 return raw ALT alleles as string
 
float QUAL ()
 return QUAL value
 
std::string FILTER ()
 return raw FILTER column as string
 
std::string allINFO ()
 return raw INFO column as string. recommend to use getINFO for specific tag.
 
bool allPhased () const
 return boolean value indicates if genotypes of all samples are phased
 
int ploidy () const
 return the number of ploidy of current variant
 
void setPloidy (int v)
 in a rare case, one may want to set the number of ploidy manually
 

Public Attributes

std::vector< char > isGenoMissing
 if there is "." in GT for the sample, then it's coded as missing (TRUE)
 
std::vector< char > typeOfGT
 vector of nsamples length. keep track of the type of genotype (one of GT_HOM_RR, GT_HET_RA, GT_HOM_AA, GT_HET_AA, GT_HAPL_R, GT_HAPL_A or GT_UNKN). More...
 
std::vector< char > gtPhase
 vector of nsamples length. keep track of the phasing status of each sample
 

Friends

class BcfReader
 
class BcfWriter
 
std::ostream & operator<< (std::ostream &out, const BcfRecord &v)
 stream out the variant
 

Detailed Description

Object represents a variant record in the VCF, offering methods to access and modify fields.

Note
BcfRecord has to be associated with a BcfHeader object and needs to be filled in by calling BcfReader.getNextVariant function.

Member Function Documentation

◆ getFORMAT() [1/2]

bool vcfpp::BcfRecord::getFORMAT ( std::string  tag,
std::vector< std::string > &  v 
)
inline

get tag value in FORMAT

Parameters
tagvalid tag name in FORMAT column declared in the VCF header
vvector of string
Returns
bool

◆ getFORMAT() [2/2]

template<typename T , typename S = typename T::value_type>
isFormatVector<T> vcfpp::BcfRecord::getFORMAT ( std::string  tag,
T &  v 
)
inline

get tag value in FORMAT

Parameters
tagvalid tag name in FORMAT column declared in the VCF header
vvalid input include vector of float, char, int type
Returns
bool

◆ getGenotypes() [1/2]

bool vcfpp::BcfRecord::getGenotypes ( std::vector< int > &  v)
inline

fill in the input vector with genotype values, 0, 1 or -9 (missing).

Parameters
vvalid input is vector<int> type
Returns
bool
Note
this function provides full capability to handle all kinds of genotypes in multi-ploidy data costing more spae than the other function. missing allele is set as -9.

◆ getGenotypes() [2/2]

template<typename T >
isValidGT<T> vcfpp::BcfRecord::getGenotypes ( T &  v)
inline

fill in the input vector with genotypes of 0 and 1. only works for ploidy<=2. Genotypes with missing allele is coded as heterozygous

Parameters
vvalid input includes vector<bool> and vector<char> type
Returns
bool
Note
use isNoneMissing() to check if all genotypes are with no missingness. Alternatively, one can use vector<int> as the input type as noted in the other overloading function getGenotypes().

◆ getINFO() [1/3]

template<typename T , typename S = typename T::value_type>
isInfoVector<T> vcfpp::BcfRecord::getINFO ( std::string  tag,
T &  v 
)
inline

get tag value in INFO

Parameters
tagvalid tag name in INFO column declared in the VCF header
vvalid input include vector of float, int type
Returns
bool

◆ getINFO() [2/3]

template<typename T >
isScalar<T> vcfpp::BcfRecord::getINFO ( std::string  tag,
T &  v 
)
inline

get tag value in INFO

Parameters
tagvalid tag name in INFO column declared in the VCF header
vvalid input include scalar value of float or int type
Returns
bool

◆ getINFO() [3/3]

template<typename T >
isString<T> vcfpp::BcfRecord::getINFO ( std::string  tag,
T &  v 
)
inline

get tag value in INFO

Parameters
tagvalid tag name in INFO column declared in the VCF header
vvalid input is std::string
Returns
bool

◆ setFORMAT() [1/2]

template<typename T >
isValidFMT<T> vcfpp::BcfRecord::setFORMAT ( std::string  tag,
const T &  v 
)
inline

set tag values for all samples in FORMAT using given vector

Parameters
tagvalid tag name in FORMAT column declared in the VCF header
vvalid input include vector<int>, vector<float>, vector<char>, std::string
Returns
bool

◆ setFORMAT() [2/2]

template<typename T >
isScalar<T> vcfpp::BcfRecord::setFORMAT ( std::string  tag,
const T &  v 
)
inline

set tag for a single sample in FORMAT using given singular value. this works only when there is one sample in the vcf

Parameters
tagvalid tag name in FORMAT column declared in the VCF header
vvalid input include int, float or double
Returns
void

◆ setGenotypes()

bool vcfpp::BcfRecord::setGenotypes ( const std::vector< int > &  v)
inline

set genotypes from scratch even if genotypes not present

Parameters
vthe genotypes of vector<int> type
Returns
bool

◆ setINFO() [1/2]

template<typename T >
isScalar<T> vcfpp::BcfRecord::setINFO ( std::string  tag,
const T &  v 
)
inline

set tag value for INFO

Parameters
tagvalid tag name in INFO column declared in the VCF header
vvalid input include scalar value of float or int type
Returns
bool

◆ setINFO() [2/2]

template<typename T >
isValidInfo<T> vcfpp::BcfRecord::setINFO ( std::string  tag,
const T &  v 
)
inline

set tag value for INFO

Parameters
tagvalid tag name in INFO column declared in the VCF header
vvalid input include vector<int> vector<float> std::string
Returns
bool

◆ setPhasing()

void vcfpp::BcfRecord::setPhasing ( const std::vector< char > &  v)
inline

set phasing status for all diploid samples using given vector

Parameters
vvalid input includes vector<char>

Member Data Documentation

◆ typeOfGT

std::vector<char> vcfpp::BcfRecord::typeOfGT

vector of nsamples length. keep track of the type of genotype (one of GT_HOM_RR, GT_HET_RA, GT_HOM_AA, GT_HET_AA, GT_HAPL_R, GT_HAPL_A or GT_UNKN).

Note
GT_HOM_RR 0
GT_HOM_AA 1
GT_HET_RA 2
GT_HET_AA 3
GT_HAPL_R 4
GT_HAPL_A 5
GT_UNKN 6

The documentation for this class was generated from the following file: