## ----eval=TRUE, message=FALSE, warning=FALSE---------------------------------- # Load packages required for this example library(tigger) library(dplyr) ## ----eval=TRUE, warning=FALSE------------------------------------------------- # Detect novel alleles novel <- findNovelAlleles(AIRRDb, SampleGermlineIGHV, nproc=1) ## ----eval=TRUE, warning=FALSE------------------------------------------------- # Extract and view the rows that contain successful novel allele calls novel_rows <- selectNovel(novel) ## ----eval=TRUE, warning=FALSE, fig.width=6, fig.height=8---------------------- # Plot evidence of the first (and only) novel allele from the example data novel_row <- which(!is.na(novel$polymorphism_call))[1] plotNovel(AIRRDb, novel[novel_row, ]) ## ----eval=TRUE, warning=FALSE, fig.width=4, fig.height=3---------------------- # Infer the individual's genotype, using only unmutated sequences and checking # for the use of the novel alleles inferred in the earlier step. geno <- inferGenotype(AIRRDb, germline_db=SampleGermlineIGHV, novel=novel, find_unmutated=TRUE) # Save the genotype sequences to a vector genotype_db <- genotypeFasta(geno, SampleGermlineIGHV, novel) # Visualize the genotype and sequence counts print(geno) # Make a colorful visualization. Bars indicate presence, not proportion. plotGenotype(geno, text_size = 10) ## ----eval=TRUE, warning=FALSE, fig.width=4, fig.height=3---------------------- # Infer the individual's genotype, using the bayesian method geno_bayesian <- inferGenotypeBayesian(AIRRDb, germline_db=SampleGermlineIGHV, novel=novel, find_unmutated=TRUE) # Visualize the genotype and sequence counts print(geno_bayesian) # Make a colorful visualization. Bars indicate presence, not proportion. plotGenotype(geno_bayesian, text_size=10) ## ----eval=TRUE, warning=FALSE------------------------------------------------- # Use the personlized genotype to determine corrected allele assignments # Updated genotype will be placed in the v_call_genotyped column sample_db <- reassignAlleles(AIRRDb, genotype_db) ## ----eval=TRUE, warning=FALSE------------------------------------------------- # Find the set of alleles in the original calls that were not in the genotype not_in_genotype <- sample_db$v_call %>% strsplit(",") %>% unlist() %>% unique() %>% setdiff(names(genotype_db)) # Determine the fraction of calls that were ambigious before/after correction # and the fraction that contained original calls to non-genotype alleles. Note # that by design, only genotype alleles are allowed in "after" calls. data.frame(Ambiguous=c(mean(grepl(",", sample_db$v_call)), mean(grepl(",", sample_db$v_call_genotyped))), NotInGenotype=c(mean(sample_db$v_call %in% not_in_genotype), mean(sample_db$v_call_genotyped %in% not_in_genotype)), row.names=c("Before", "After")) %>% t() %>% round(3) ## ----eval=TRUE, warning=FALSE------------------------------------------------- evidence <- generateEvidence(sample_db, novel, geno, genotype_db, SampleGermlineIGHV, fields = NULL) evidence %>% select(gene, allele, polymorphism_call, sequences, unmutated_frequency)