1315 lines
46 KiB
Go
1315 lines
46 KiB
Go
|
|
// createPersonGeneticAnalysis provides functions to create a Person genetic analysis
|
|
// These analyses are performed on one or more genome files.
|
|
// They contain 3 categories of results: Monogenic Diseases, Polygenic Diseases and Traits
|
|
// Use createCoupleGeneticAnalysis.go to create Couple genetic analyses
|
|
|
|
package createPersonGeneticAnalysis
|
|
|
|
// Disclaimer: I am a novice in the ways of genetics. This package could be flawed in numerous ways.
|
|
|
|
// TODO: We want to eventually use neural nets for both trait and polygenic disease analysis (see geneticPrediction.go)
|
|
// These will be trained on a set of genomes and will output a probability analysis for each trait/disease
|
|
// This is only possible once we get access to the necessary training data
|
|
|
|
// TODO: Add the ability to weight different genome files based on their reliability.
|
|
// Some files are much more accurate because they record each location many times.
|
|
|
|
import "seekia/resources/geneticReferences/locusMetadata"
|
|
import "seekia/resources/geneticReferences/monogenicDiseases"
|
|
import "seekia/resources/geneticReferences/polygenicDiseases"
|
|
import "seekia/resources/geneticReferences/traits"
|
|
|
|
import "seekia/internal/encoding"
|
|
import "seekia/internal/genetics/geneticAnalysis"
|
|
import "seekia/internal/genetics/geneticPrediction"
|
|
import "seekia/internal/genetics/locusValue"
|
|
import "seekia/internal/genetics/prepareRawGenomes"
|
|
import "seekia/internal/helpers"
|
|
|
|
import "errors"
|
|
import "slices"
|
|
import "reflect"
|
|
|
|
|
|
//Outputs:
|
|
// -bool: Process completed (it was not stopped manually before completion)
|
|
// -string: New Genetic analysis string (Encoded in MessagePack)
|
|
// -error
|
|
func CreatePersonGeneticAnalysis(genomesList []prepareRawGenomes.RawGenomeWithMetadata, updatePercentageCompleteFunction func(int)error, checkIfProcessIsStopped func()bool)(bool, string, error){
|
|
|
|
prepareRawGenomesUpdatePercentageCompleteFunction := func(newPercentage int)error{
|
|
|
|
newPercentageCompletion, err := helpers.ScaleNumberProportionally(true, newPercentage, 0, 100, 0, 50)
|
|
if (err != nil){ return err }
|
|
|
|
err = updatePercentageCompleteFunction(newPercentageCompletion)
|
|
if (err != nil) { return err }
|
|
|
|
return nil
|
|
}
|
|
|
|
genomesWithMetadataList, allRawGenomeIdentifiersList, multipleGenomesExist, onlyExcludeConflictsGenomeIdentifier, onlyIncludeSharedGenomeIdentifier, err := prepareRawGenomes.GetGenomesWithMetadataListFromRawGenomesList(genomesList, prepareRawGenomesUpdatePercentageCompleteFunction)
|
|
if (err != nil) { return false, "", err }
|
|
|
|
// This map stores each genome's locus values
|
|
// Map Structure: Genome Identifier -> Genome locus values map (rsID -> Locus Value)
|
|
genomesMap := make(map[[16]byte]map[int64]locusValue.LocusValue)
|
|
|
|
for _, genomeWithMetadata := range genomesWithMetadataList{
|
|
|
|
genomeIdentifier := genomeWithMetadata.GenomeIdentifier
|
|
genomeMap := genomeWithMetadata.GenomeMap
|
|
|
|
genomesMap[genomeIdentifier] = genomeMap
|
|
}
|
|
|
|
newGeneticAnalysisObject := geneticAnalysis.PersonAnalysis{
|
|
AnalysisVersion: 1,
|
|
CombinedGenomesExist: multipleGenomesExist,
|
|
AllRawGenomeIdentifiersList: allRawGenomeIdentifiersList,
|
|
GenomesMap: genomesMap,
|
|
}
|
|
|
|
if (multipleGenomesExist == true){
|
|
|
|
newGeneticAnalysisObject.OnlyExcludeConflictsGenomeIdentifier = onlyExcludeConflictsGenomeIdentifier
|
|
newGeneticAnalysisObject.OnlyIncludeSharedGenomeIdentifier = onlyIncludeSharedGenomeIdentifier
|
|
}
|
|
|
|
processIsStopped := checkIfProcessIsStopped()
|
|
if (processIsStopped == true){
|
|
return false, "", nil
|
|
}
|
|
|
|
monogenicDiseasesList, err := monogenicDiseases.GetMonogenicDiseaseObjectsList()
|
|
if (err != nil) { return false, "", err }
|
|
|
|
// Map Structure: Disease Name -> PersonMonogenicDiseaseInfo
|
|
analysisMonogenicDiseasesMap := make(map[string]geneticAnalysis.PersonMonogenicDiseaseInfo)
|
|
|
|
for _, monogenicDiseaseObject := range monogenicDiseasesList{
|
|
|
|
diseaseName := monogenicDiseaseObject.DiseaseName
|
|
|
|
personDiseaseAnalysisObject, err := GetPersonMonogenicDiseaseAnalysis(genomesWithMetadataList, monogenicDiseaseObject)
|
|
if (err != nil) { return false, "", err }
|
|
|
|
analysisMonogenicDiseasesMap[diseaseName] = personDiseaseAnalysisObject
|
|
}
|
|
|
|
newGeneticAnalysisObject.MonogenicDiseasesMap = analysisMonogenicDiseasesMap
|
|
|
|
polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList()
|
|
if (err != nil) { return false, "", err }
|
|
|
|
// Map Structure: Disease Name -> PersonPolygenicDiseaseInfo
|
|
analysisPolygenicDiseasesMap := make(map[string]geneticAnalysis.PersonPolygenicDiseaseInfo)
|
|
|
|
for _, diseaseObject := range polygenicDiseaseObjectsList{
|
|
|
|
personDiseaseAnalysisObject, err := GetPersonPolygenicDiseaseAnalysis(genomesWithMetadataList, diseaseObject)
|
|
if (err != nil) { return false, "", err }
|
|
|
|
diseaseName := diseaseObject.DiseaseName
|
|
|
|
analysisPolygenicDiseasesMap[diseaseName] = personDiseaseAnalysisObject
|
|
}
|
|
|
|
newGeneticAnalysisObject.PolygenicDiseasesMap = analysisPolygenicDiseasesMap
|
|
|
|
traitObjectsList, err := traits.GetTraitObjectsList()
|
|
if (err != nil) { return false, "", err }
|
|
|
|
// This map will always contain an entry for each discrete trait
|
|
// Map Structure: Trait Name -> PersonDiscreteTraitInfo
|
|
analysisDiscreteTraitsMap := make(map[string]geneticAnalysis.PersonDiscreteTraitInfo)
|
|
|
|
// This map will not contain entries for traits which this person's genome has no known loci
|
|
// Map Structure: Trait Name -> PersonNumericTraitInfo
|
|
analysisNumericTraitsMap := make(map[string]geneticAnalysis.PersonNumericTraitInfo)
|
|
|
|
for _, traitObject := range traitObjectsList{
|
|
|
|
traitName := traitObject.TraitName
|
|
traitIsDiscreteOrNumeric := traitObject.DiscreteOrNumeric
|
|
|
|
if (traitIsDiscreteOrNumeric == "Discrete"){
|
|
|
|
personTraitAnalysisObject, err := GetPersonDiscreteTraitAnalysis(genomesWithMetadataList, traitObject)
|
|
if (err != nil) { return false, "", err }
|
|
|
|
analysisDiscreteTraitsMap[traitName] = personTraitAnalysisObject
|
|
}
|
|
}
|
|
|
|
newGeneticAnalysisObject.DiscreteTraitsMap = analysisDiscreteTraitsMap
|
|
newGeneticAnalysisObject.NumericTraitsMap = analysisNumericTraitsMap
|
|
|
|
analysisBytes, err := encoding.EncodeMessagePackBytes(newGeneticAnalysisObject)
|
|
if (err != nil) { return false, "", err }
|
|
|
|
analysisString := string(analysisBytes)
|
|
|
|
return true, analysisString, nil
|
|
}
|
|
|
|
|
|
//Outputs:
|
|
// -geneticAnalysis.PersonMonogenicDiseaseInfo: Monogenic disease analysis object
|
|
// -error
|
|
func GetPersonMonogenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, diseaseObject monogenicDiseases.MonogenicDisease)(geneticAnalysis.PersonMonogenicDiseaseInfo, error){
|
|
|
|
emptyDiseaseInfoObject := geneticAnalysis.PersonMonogenicDiseaseInfo{}
|
|
|
|
dominantOrRecessive := diseaseObject.DominantOrRecessive
|
|
variantsList := diseaseObject.VariantsList
|
|
|
|
// We use this map to keep track of which RSIDs corresponds to each variant
|
|
// We also use it to have a map of all variants for the disease
|
|
// Map Structure: Variant Identifier -> []rsID
|
|
variantRSIDsMap := make(map[[3]byte][]int64)
|
|
|
|
// This map stores all rsIDs for this monogenic disease
|
|
// These are locations in the disease's gene which, if mutated, are known to cause the disease
|
|
// We use this map to avoid duplicate rsIDs, because one rsID can have multiple variants which belong to it
|
|
// We also store all alias rsIDs in this map
|
|
allRSIDsMap := make(map[int64]struct{})
|
|
|
|
for _, variantObject := range variantsList{
|
|
|
|
variantIdentifierHex := variantObject.VariantIdentifier
|
|
|
|
variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex)
|
|
if (err != nil) { return emptyDiseaseInfoObject, err }
|
|
|
|
variantRSID := variantObject.VariantRSID
|
|
|
|
variantRSIDsList := []int64{variantRSID}
|
|
|
|
// We add aliases to variantRSIDsList
|
|
|
|
anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(variantRSID)
|
|
if (err != nil) { return emptyDiseaseInfoObject, err }
|
|
if (anyAliasesExist == true){
|
|
variantRSIDsList = append(variantRSIDsList, rsidAliasesList...)
|
|
}
|
|
|
|
variantRSIDsMap[variantIdentifier] = variantRSIDsList
|
|
|
|
for _, rsID := range variantRSIDsList{
|
|
allRSIDsMap[rsID] = struct{}{}
|
|
}
|
|
}
|
|
|
|
// Now we create a new map without any rsID aliases
|
|
// Each rsID in this map represents a unique locus on the genome
|
|
// Each rsID may have aliases, but they are not included in this map
|
|
allUniqueRSIDsMap := make(map[int64]struct{})
|
|
|
|
for rsID, _ := range allRSIDsMap{
|
|
|
|
anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(rsID)
|
|
if (err != nil) { return emptyDiseaseInfoObject, err }
|
|
if (anyAliasesExist == false){
|
|
allUniqueRSIDsMap[rsID] = struct{}{}
|
|
continue
|
|
}
|
|
|
|
// We see if we already added an alias of this rsID to the map
|
|
|
|
checkIfAliasAlreadyExists := func()bool{
|
|
|
|
for _, rsIDAlias := range rsidAliasesList{
|
|
_, exists := allUniqueRSIDsMap[rsIDAlias]
|
|
if (exists == true){
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
aliasAlreadyExists := checkIfAliasAlreadyExists()
|
|
if (aliasAlreadyExists == true){
|
|
// We already added this alias
|
|
continue
|
|
}
|
|
allUniqueRSIDsMap[rsID] = struct{}{}
|
|
}
|
|
|
|
// Map Structure: Genome Identifier -> PersonGenomeMonogenicDiseaseInfo
|
|
monogenicDiseaseInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomeMonogenicDiseaseInfo)
|
|
|
|
for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{
|
|
|
|
genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier
|
|
genomeMap := genomeWithMetadataObject.GenomeMap
|
|
|
|
// This stores all variant info for this genome
|
|
// Map Structure: Variant Identifier -> PersonGenomeMonogenicDiseaseVariantInfo
|
|
variantsInfoMap := make(map[[3]byte]geneticAnalysis.PersonGenomeMonogenicDiseaseVariantInfo)
|
|
|
|
for _, variantObject := range variantsList{
|
|
|
|
variantIdentifierHex := variantObject.VariantIdentifier
|
|
|
|
variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex)
|
|
if (err != nil) { return emptyDiseaseInfoObject, err }
|
|
|
|
variantRSID := variantObject.VariantRSID
|
|
|
|
basePairValueFound, base1Value, base2Value, locusIsPhased, _, err := GetLocusValueFromGenomeMap(true, genomeMap, variantRSID)
|
|
if (err != nil) { return emptyDiseaseInfoObject, err }
|
|
if (basePairValueFound == false){
|
|
|
|
// This genome does not contain info for this variant
|
|
// We skip it
|
|
continue
|
|
}
|
|
|
|
// This genome has at least 1 variant
|
|
|
|
variantDefectiveBase := variantObject.DefectiveBase
|
|
|
|
getBaseIsVariantMutationBool := func(inputBase string)bool{
|
|
|
|
if (inputBase == variantDefectiveBase){
|
|
return true
|
|
}
|
|
// Base could be mutated to a different unhealthy base
|
|
// That mutation could be a neutral/healthier change
|
|
// We only care about this specific variant
|
|
return false
|
|
}
|
|
|
|
base1IsDefective := getBaseIsVariantMutationBool(base1Value)
|
|
base2IsDefective := getBaseIsVariantMutationBool(base2Value)
|
|
|
|
newDiseaseVariantInfoObject := geneticAnalysis.PersonGenomeMonogenicDiseaseVariantInfo{
|
|
Base1HasVariant: base1IsDefective,
|
|
Base2HasVariant: base2IsDefective,
|
|
LocusIsPhased: locusIsPhased,
|
|
}
|
|
|
|
variantsInfoMap[variantIdentifier] = newDiseaseVariantInfoObject
|
|
|
|
//TODO: Add LocusIsPhased to readGeneticAnalysis package
|
|
}
|
|
|
|
// We are done adding variant information for the genome
|
|
// Now we determine probability that user will pass a disease variant to offspring, and if the user has the disease
|
|
|
|
numberOfVariantsTested := len(variantsInfoMap)
|
|
|
|
if (numberOfVariantsTested == 0){
|
|
// We don't know anything about this genome's disease risk for this disease
|
|
// We won't add any information to the map
|
|
continue
|
|
}
|
|
|
|
// This stores the number of loci that were tested
|
|
// Each locus can have multiple potential variants
|
|
numberOfLociTested := 0
|
|
|
|
// This stores the number of tested loci that are phased
|
|
// A higher number means that the results are more potentially more accurate
|
|
// It is only more accurate if multiple heterozygous variants on seperate loci exist.
|
|
numberOfPhasedLoci := 0
|
|
|
|
for rsID, _ := range allUniqueRSIDsMap{
|
|
|
|
locusValueExists, _, _, locusIsPhased, _, err := GetLocusValueFromGenomeMap(true, genomeMap, rsID)
|
|
if (err != nil) { return emptyDiseaseInfoObject, err }
|
|
if (locusValueExists == false){
|
|
continue
|
|
}
|
|
|
|
numberOfLociTested += 1
|
|
|
|
if (locusIsPhased == true){
|
|
numberOfPhasedLoci += 1
|
|
}
|
|
}
|
|
|
|
// Outputs:
|
|
// -bool: Person has disease
|
|
// -float64: Probability Person will pass a defect (variant) to offspring (0-1)
|
|
// -error
|
|
getPersonDiseaseInfo := func()(bool, float64, error){
|
|
|
|
// These variables are used to count the number of defective variants that exist on each chromosome
|
|
numberOfVariants_Chromosome1 := 0
|
|
numberOfVariants_Chromosome2 := 0
|
|
numberOfVariants_UnknownChromosome := 0
|
|
|
|
// We use this map to keep track of how many mutations exist for each rsID
|
|
// This allows us to know if 2 different variant mutations exist for a single rsID
|
|
// For example, base1 is a different deleterious mutation than base2
|
|
// If this ever happens, we know that the user has the disease,
|
|
// because both copies of the gene locus are defective.
|
|
rsidMutationsMap := make(map[int64]int)
|
|
|
|
for variantIdentifier, variantInfoObject := range variantsInfoMap{
|
|
|
|
locusIsPhasedStatus := variantInfoObject.LocusIsPhased
|
|
|
|
base1HasVariant := variantInfoObject.Base1HasVariant
|
|
base2HasVariant := variantInfoObject.Base2HasVariant
|
|
|
|
if (base1HasVariant == false && base2HasVariant == false){
|
|
// Neither chromosome contains the variant mutation.
|
|
continue
|
|
}
|
|
|
|
if (base1HasVariant == true && base2HasVariant == true){
|
|
// Both chromosomes contain the same variant mutation.
|
|
// Person has the disease.
|
|
// Person will definitely pass disease variant to offspring.
|
|
return true, 1, nil
|
|
}
|
|
|
|
// We know that this variant exists on 1 of the bases, but not both.
|
|
|
|
variantRSIDsList, exists := variantRSIDsMap[variantIdentifier]
|
|
if (exists == false){
|
|
return false, 0, errors.New("variantRSIDsMap missing variantIdentifier.")
|
|
}
|
|
|
|
for _, rsID := range variantRSIDsList{
|
|
rsidMutationsMap[rsID] += 1
|
|
}
|
|
|
|
if (locusIsPhasedStatus == true){
|
|
|
|
if (base1HasVariant == true){
|
|
numberOfVariants_Chromosome1 += 1
|
|
}
|
|
if (base2HasVariant == true){
|
|
numberOfVariants_Chromosome2 += 1
|
|
}
|
|
} else {
|
|
|
|
if (base1HasVariant == true || base2HasVariant == true){
|
|
numberOfVariants_UnknownChromosome += 1
|
|
}
|
|
}
|
|
}
|
|
|
|
totalNumberOfVariants := numberOfVariants_Chromosome1 + numberOfVariants_Chromosome2 + numberOfVariants_UnknownChromosome
|
|
|
|
if (totalNumberOfVariants == 0){
|
|
// Person does not have any disease variants.
|
|
// They do not have the disease, and have no chance of passing a disease variant
|
|
return false, 0, nil
|
|
}
|
|
|
|
// Now we check to see if there are any loci which have 2 different variants, one for each base
|
|
|
|
for _, numberOfMutations := range rsidMutationsMap{
|
|
|
|
if (numberOfMutations >= 2){
|
|
// Person has 2 mutations on the same location
|
|
// They must have the disease, and will definitely pass a variant to their offspring
|
|
return true, 1, nil
|
|
}
|
|
}
|
|
|
|
// At this point, we know that there are no homozygous variant mutations
|
|
// All variant mutations are heterozygous, meaning the other chromosome strand's base is healthy
|
|
|
|
//Outputs:
|
|
// -bool: Person has disease
|
|
getPersonHasDiseaseBool := func()bool{
|
|
|
|
if (dominantOrRecessive == "Dominant"){
|
|
// Only 1 variant is needed for the person to have the disease
|
|
// We know they have at least 1 variant
|
|
return true
|
|
}
|
|
|
|
// dominantOrRecessive == "Recessive"
|
|
|
|
if (totalNumberOfVariants == 1){
|
|
// There is only 1 variant in total.
|
|
// This single variant cannot exist on both chromosomes.
|
|
// The person does not have the disease
|
|
return false
|
|
}
|
|
|
|
// We know that there are at least 2 variants
|
|
|
|
if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){
|
|
|
|
// We know there is at least 1 variant mutation on each chromosome
|
|
// Therefore, the person has the disease
|
|
return true
|
|
}
|
|
|
|
if (numberOfVariants_UnknownChromosome == 0){
|
|
|
|
// We know that variants do not exist on both chromosomes, only on 1.
|
|
// Thus, the person does not have the disease
|
|
return false
|
|
}
|
|
|
|
// We know there are at least 2 variants
|
|
// We know there is at least 1 variant whose phase is unknown
|
|
|
|
// If all mutations are on the same chromosome, the person does not have the disease.
|
|
// If at least 1 mutation exists on each chromosome, the person does have the disease.
|
|
// Either way, we don't know enough to say if the person has the disease.
|
|
// We will report that they do not, because their genome does not conclusively say that they do.
|
|
// This is why phased genomes are useful and provide a more accurate reading
|
|
// TODO: Explain this to the user in the GUI
|
|
// We must explain that unphased genomes will not detect disease sometimes
|
|
|
|
return false
|
|
}
|
|
|
|
personHasDiseaseBool := getPersonHasDiseaseBool()
|
|
|
|
// Output:
|
|
// -float64: Probability person will pass a disease variant to their offspring (0-1)
|
|
getPersonWillPassVariantProbability := func()float64{
|
|
|
|
if (totalNumberOfVariants == 1){
|
|
|
|
// There is only 1 variant on any chromosome
|
|
// The probability of the person passing a variant is 50%.
|
|
return 0.5
|
|
}
|
|
|
|
// We know that there are at least 2 variants
|
|
|
|
if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){
|
|
|
|
// We know there is at least 1 variant mutation on each chromosome
|
|
// Therefore, the person will definitely pass a variant
|
|
return 1
|
|
}
|
|
if (numberOfVariants_UnknownChromosome == 0){
|
|
|
|
// We know that variants do not exist on both chromosomes, only on 1.
|
|
// Thus, the person has a 50% probability of passing a variant
|
|
return 0.5
|
|
}
|
|
|
|
// We know all variants are heterozygous
|
|
|
|
// From Wikipeia:
|
|
// The human genome contains somewhere between 19,000 and 20,000 protein-coding genes.
|
|
// These genes contain an average of 10 introns and the average size of an intron is about 6 kb (6,000 base pairs)
|
|
// This means that the average size of a protein-coding gene is about 62 kb (62,000 base pairs)
|
|
|
|
// The probability of a recombination breakpoint occurring within the gene is very small
|
|
// If there is 1 breakpoint every 100 million locations, on average, and each gene is 62,000 base pairs long,
|
|
// then the probability of a breakpoint occurring within a gene is 62,000/100,000,000 = 0.00062 = .062%
|
|
// Thus, we disregard the risk of a breakpoint occurring within a gene
|
|
// I also read somewhere that breakpoints are less likely to occurr within genes, which makes this likelihood even smaller
|
|
|
|
// At this point, we know there at at least 2 variants
|
|
// We know that at least 1 of the variants has an unknown phase
|
|
// We don't know if all of the variants belong to the same chromosome
|
|
// If variants exist on both chromosomes, then the probability of passing a variant is 100%
|
|
// If all variants exist on the same chromosome, then the probability of passing a variant is 50%
|
|
// We know there is at least a 50% chance of passing a variant, and possibly higher
|
|
|
|
return 0.5
|
|
}
|
|
|
|
personWillPassVariantProbability := getPersonWillPassVariantProbability()
|
|
|
|
return personHasDiseaseBool, personWillPassVariantProbability, nil
|
|
}
|
|
|
|
personHasDisease, probabilityPersonWillPassAnyVariant, err := getPersonDiseaseInfo()
|
|
if (err != nil) { return emptyDiseaseInfoObject, err }
|
|
|
|
percentageProbabilityPersonWillPassADiseaseVariant := int(probabilityPersonWillPassAnyVariant * 100)
|
|
|
|
diseaseAnalysisObject := geneticAnalysis.PersonGenomeMonogenicDiseaseInfo{
|
|
PersonHasDisease: personHasDisease,
|
|
QuantityOfVariantsTested: numberOfVariantsTested,
|
|
QuantityOfLociTested: numberOfLociTested,
|
|
QuantityOfPhasedLoci: numberOfPhasedLoci,
|
|
ProbabilityOfPassingADiseaseVariant: percentageProbabilityPersonWillPassADiseaseVariant,
|
|
VariantsInfoMap: variantsInfoMap,
|
|
}
|
|
|
|
monogenicDiseaseInfoMap[genomeIdentifier] = diseaseAnalysisObject
|
|
}
|
|
|
|
personMonogenicDiseaseInfoObject := geneticAnalysis.PersonMonogenicDiseaseInfo{
|
|
MonogenicDiseaseInfoMap: monogenicDiseaseInfoMap,
|
|
}
|
|
|
|
if (len(monogenicDiseaseInfoMap) <= 1){
|
|
// We do not need to check for conflicts, there is only <=1 genome with disease information
|
|
// Nothing left to do. Analysis is complete.
|
|
return personMonogenicDiseaseInfoObject, nil
|
|
}
|
|
|
|
// We check for conflicts
|
|
|
|
getConflictExistsBool := func()(bool, error){
|
|
|
|
firstItemReached := false
|
|
|
|
personHasDisease := false
|
|
probabilityOfPassingAVariant := 0
|
|
|
|
for _, currentGenomeDiseaseAnalysisObject := range monogenicDiseaseInfoMap{
|
|
|
|
currentGenomePersonHasDisease := currentGenomeDiseaseAnalysisObject.PersonHasDisease
|
|
currentGenomeProbabilityOfPassingAVariant := currentGenomeDiseaseAnalysisObject.ProbabilityOfPassingADiseaseVariant
|
|
|
|
if (firstItemReached == false){
|
|
personHasDisease = currentGenomePersonHasDisease
|
|
probabilityOfPassingAVariant = currentGenomeProbabilityOfPassingAVariant
|
|
firstItemReached = true
|
|
continue
|
|
}
|
|
|
|
if (currentGenomePersonHasDisease != personHasDisease){
|
|
return true, nil
|
|
}
|
|
if (currentGenomeProbabilityOfPassingAVariant != probabilityOfPassingAVariant){
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
// Now we test variants for conflicts
|
|
// We are only doing this to see if there are variants which one genome has and another doesn't
|
|
// For example, the analysis results say that you have a 50% chance of passing a variant for both genomes, but
|
|
// they have detected a different variant for each genome.
|
|
// This means that your real risk of passing a variant may actually be higher, and you are more likely to have the disease too
|
|
|
|
for variantIdentifier, _ := range variantRSIDsMap{
|
|
|
|
// Each variant base pair is either true/false, true/true, false/false, false/true
|
|
|
|
// Two different genomes have true/false and false/true, it will not count as a conflict
|
|
// If the locus is unphased, then there is no difference between true/false and false/true
|
|
// If the locus is phased, then this flip is only meaningful if it effects the probability of disease/passing a variant
|
|
// We already checked those probabilities for conflicts earlier
|
|
// Therefore, any flip is not considered a conflict
|
|
// We only care about conflicts where 1 genome says you have a variant and the other says you don't, or
|
|
// one says you have only 1 mutation and the other says you have 2 at that location
|
|
|
|
firstItemReached := false
|
|
|
|
base1HasVariant := false
|
|
base2HasVariant := false
|
|
|
|
for _, currentGenomeDiseaseAnalysisObject := range monogenicDiseaseInfoMap{
|
|
|
|
variantsInfoMap := currentGenomeDiseaseAnalysisObject.VariantsInfoMap
|
|
|
|
variantInfoObject, exists := variantsInfoMap[variantIdentifier]
|
|
if (exists == false){
|
|
if (firstItemReached == true){
|
|
// A previous genome has information for this variant, and the current one does not
|
|
return true, nil
|
|
}
|
|
continue
|
|
}
|
|
|
|
currentBase1HasVariant := variantInfoObject.Base1HasVariant
|
|
currentBase2HasVariant := variantInfoObject.Base2HasVariant
|
|
|
|
if (firstItemReached == false){
|
|
base1HasVariant = currentBase1HasVariant
|
|
base2HasVariant = currentBase2HasVariant
|
|
firstItemReached = true
|
|
continue
|
|
}
|
|
|
|
if (base1HasVariant == currentBase1HasVariant && base2HasVariant == currentBase2HasVariant){
|
|
// No conflict exists
|
|
continue
|
|
}
|
|
if (base1HasVariant == currentBase2HasVariant && base2HasVariant == currentBase1HasVariant){
|
|
// We don't count this as a conflict
|
|
continue
|
|
}
|
|
|
|
// A conflict exists
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
conflictExists, err := getConflictExistsBool()
|
|
if (err != nil) { return emptyDiseaseInfoObject, err }
|
|
|
|
personMonogenicDiseaseInfoObject.ConflictExists = conflictExists
|
|
|
|
return personMonogenicDiseaseInfoObject, nil
|
|
}
|
|
|
|
|
|
//Outputs:
|
|
// -bool: Any loci tested
|
|
// -int: Person genome risk score (value between 0-10)
|
|
// -int: Person Genome Number of loci tested
|
|
// -map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo: Person disease locus info map
|
|
// Map Structure: Locus Identifier -> PersonGenomePolygenicDiseaseLocusInfo
|
|
// -error
|
|
func GetPersonGenomePolygenicDiseaseInfo(diseaseLociList []polygenicDiseases.DiseaseLocus, personLocusValuesMap map[int64]locusValue.LocusValue, lookForLocusAliases bool)(bool, int, int, map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo, error){
|
|
|
|
if (len(personLocusValuesMap) == 0){
|
|
return false, 0, 0, nil, nil
|
|
}
|
|
|
|
// Map Structure: Locus Identifier -> PersonGenomePolygenicDiseaseLocusInfo
|
|
genomeLociInfoMap := make(map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo)
|
|
|
|
summedDiseaseRiskWeight := 0
|
|
|
|
minimumPossibleRiskWeightSum := 0
|
|
maximumPossibleRiskWeightSum := 0
|
|
|
|
for _, locusObject := range diseaseLociList{
|
|
|
|
locusRSID := locusObject.LocusRSID
|
|
locusRiskWeightsMap := locusObject.RiskWeightsMap
|
|
locusOddsRatiosMap := locusObject.OddsRatiosMap
|
|
locusMinimumWeight := locusObject.MinimumRiskWeight
|
|
locusMaximumWeight := locusObject.MaximumRiskWeight
|
|
|
|
locusValueFound, locusBase1Value, locusBase2Value, _, _, err := GetLocusValueFromGenomeMap(lookForLocusAliases, personLocusValuesMap, locusRSID)
|
|
if (err != nil) { return false, 0, 0, nil, err }
|
|
if (locusValueFound == false){
|
|
continue
|
|
}
|
|
|
|
locusRiskWeight, locusOddsRatioIsKnown, locusOddsRatio, err := GetGenomePolygenicDiseaseLocusRiskInfo(locusRiskWeightsMap, locusOddsRatiosMap, locusBase1Value, locusBase2Value)
|
|
if (err != nil) { return false, 0, 0, nil, err }
|
|
|
|
newLocusInfoObject := geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo{
|
|
RiskWeight: locusRiskWeight,
|
|
OddsRatioIsKnown: locusOddsRatioIsKnown,
|
|
}
|
|
|
|
if (locusOddsRatioIsKnown == true){
|
|
newLocusInfoObject.OddsRatio = locusOddsRatio
|
|
}
|
|
|
|
locusIdentifierHex := locusObject.LocusIdentifier
|
|
|
|
locusIdentifier, err := encoding.DecodeHexStringTo3ByteArray(locusIdentifierHex)
|
|
if (err != nil) { return false, 0, 0, nil, err }
|
|
|
|
genomeLociInfoMap[locusIdentifier] = newLocusInfoObject
|
|
|
|
minimumPossibleRiskWeightSum += locusMinimumWeight
|
|
maximumPossibleRiskWeightSum += locusMaximumWeight
|
|
|
|
summedDiseaseRiskWeight += locusRiskWeight
|
|
}
|
|
|
|
numberOfLociTested := len(genomeLociInfoMap)
|
|
if (numberOfLociTested == 0){
|
|
// We have no information about this disease for this genome
|
|
return false, 0, 0, nil, nil
|
|
}
|
|
|
|
diseaseRiskScore, err := helpers.ScaleNumberProportionally(true, summedDiseaseRiskWeight, minimumPossibleRiskWeightSum, maximumPossibleRiskWeightSum, 0, 10)
|
|
if (err != nil) { return false, 0, 0, nil, err }
|
|
|
|
return true, diseaseRiskScore, numberOfLociTested, genomeLociInfoMap, nil
|
|
}
|
|
|
|
|
|
//Outputs:
|
|
// -geneticAnalysis.PersonPolygenicDiseaseInfo
|
|
// -error
|
|
func GetPersonPolygenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, diseaseObject polygenicDiseases.PolygenicDisease)(geneticAnalysis.PersonPolygenicDiseaseInfo, error){
|
|
|
|
// We use this when returning errors
|
|
emptyDiseaseInfoObject := geneticAnalysis.PersonPolygenicDiseaseInfo{}
|
|
|
|
diseaseLociList := diseaseObject.LociList
|
|
|
|
// This map stores the polygenic disease for each of the person's genomes
|
|
// Map Structure: Genome Identifier -> PersonGenomePolygenicDiseaseInfo
|
|
personPolygenicDiseaseInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomePolygenicDiseaseInfo)
|
|
|
|
// We construct polygenic disease probability info for each genome
|
|
|
|
for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{
|
|
|
|
genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier
|
|
genomeMap := genomeWithMetadataObject.GenomeMap
|
|
|
|
// This map stores the loci for this disease and does not contain loci which do not belong to this disease
|
|
// Map Structure: rsID -> Locus Value
|
|
genomeLocusValuesMap := make(map[int64]locusValue.LocusValue)
|
|
|
|
for _, locusObject := range diseaseLociList{
|
|
|
|
locusRSID := locusObject.LocusRSID
|
|
|
|
locusValueFound, _, _, _, locusValueObject, err := GetLocusValueFromGenomeMap(true, genomeMap, locusRSID)
|
|
if (err != nil) { return emptyDiseaseInfoObject, err }
|
|
if (locusValueFound == false){
|
|
continue
|
|
}
|
|
|
|
genomeLocusValuesMap[locusRSID] = locusValueObject
|
|
}
|
|
|
|
anyLociTested, personDiseaseRiskScore, genomeNumberOfLociTested, genomeLociInfoMap, err := GetPersonGenomePolygenicDiseaseInfo(diseaseLociList, genomeLocusValuesMap, true)
|
|
if (err != nil) { return emptyDiseaseInfoObject, err }
|
|
if (anyLociTested == false){
|
|
continue
|
|
}
|
|
|
|
newDiseaseInfoObject := geneticAnalysis.PersonGenomePolygenicDiseaseInfo{
|
|
QuantityOfLociTested: genomeNumberOfLociTested,
|
|
RiskScore: personDiseaseRiskScore,
|
|
LociInfoMap: genomeLociInfoMap,
|
|
}
|
|
|
|
personPolygenicDiseaseInfoMap[genomeIdentifier] = newDiseaseInfoObject
|
|
}
|
|
|
|
newPersonPolygenicDiseaseInfoObject := geneticAnalysis.PersonPolygenicDiseaseInfo{
|
|
PolygenicDiseaseInfoMap: personPolygenicDiseaseInfoMap,
|
|
}
|
|
|
|
if (len(personPolygenicDiseaseInfoMap) <= 1){
|
|
// We do not need to check for conflicts, there is only <=1 genome with disease information
|
|
// Nothing left to do. Analysis is complete.
|
|
return newPersonPolygenicDiseaseInfoObject, nil
|
|
}
|
|
|
|
// We check for conflicts between the different genome's results
|
|
|
|
getConflictExistsBool := func()(bool, error){
|
|
|
|
// First we check to see if any of the genomes have different risk scores or NumberOfLociTested
|
|
|
|
genomeRiskScore := 0
|
|
genomeNumberOfLociTested := 0
|
|
|
|
firstItemReached := false
|
|
|
|
for _, personGenomeDiseaseInfoObject := range personPolygenicDiseaseInfoMap{
|
|
|
|
currentGenomeRiskScore := personGenomeDiseaseInfoObject.RiskScore
|
|
currentGenomeNumberOfLociTested := personGenomeDiseaseInfoObject.QuantityOfLociTested
|
|
|
|
if (firstItemReached == false){
|
|
genomeRiskScore = currentGenomeRiskScore
|
|
genomeNumberOfLociTested = currentGenomeNumberOfLociTested
|
|
firstItemReached = true
|
|
continue
|
|
}
|
|
|
|
if (genomeRiskScore != currentGenomeRiskScore){
|
|
return true, nil
|
|
}
|
|
if (genomeNumberOfLociTested != currentGenomeNumberOfLociTested){
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
// Now we check for conflicts between the different locus values
|
|
// We consider a conflict any time the same locus has different weights/odds ratios
|
|
// We don't care if the loci have different base pair values, so long as those base pairs have the same risk weights/odds ratios
|
|
|
|
for _, locusObject := range diseaseLociList{
|
|
|
|
locusIdentifierHex := locusObject.LocusIdentifier
|
|
|
|
locusIdentifier, err := encoding.DecodeHexStringTo3ByteArray(locusIdentifierHex)
|
|
if (err != nil) { return false, err }
|
|
|
|
locusRiskWeight := 0
|
|
locusOddsRatio := float64(0)
|
|
|
|
firstItemReached := false
|
|
|
|
for _, personGenomeDiseaseInfoObject := range personPolygenicDiseaseInfoMap{
|
|
|
|
genomeLociInfoMap := personGenomeDiseaseInfoObject.LociInfoMap
|
|
|
|
genomeLocusObject, exists := genomeLociInfoMap[locusIdentifier]
|
|
if (exists == false){
|
|
if (firstItemReached == true){
|
|
// A previous genome has information for this locus, and the current one does not
|
|
return true, nil
|
|
}
|
|
continue
|
|
}
|
|
|
|
genomeLocusRiskWeight := genomeLocusObject.RiskWeight
|
|
genomeLocusOddsRatio := genomeLocusObject.OddsRatio
|
|
|
|
if (firstItemReached == false){
|
|
locusRiskWeight = genomeLocusRiskWeight
|
|
locusOddsRatio = genomeLocusOddsRatio
|
|
firstItemReached = true
|
|
continue
|
|
}
|
|
if (locusRiskWeight == genomeLocusRiskWeight && locusOddsRatio == genomeLocusOddsRatio){
|
|
// No conflict exists for this locus on the genomes we have already checked
|
|
continue
|
|
}
|
|
|
|
// Conflict exists
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
conflictExists, err := getConflictExistsBool()
|
|
if (err != nil) { return emptyDiseaseInfoObject, err }
|
|
|
|
newPersonPolygenicDiseaseInfoObject.ConflictExists = conflictExists
|
|
|
|
return newPersonPolygenicDiseaseInfoObject, nil
|
|
}
|
|
|
|
|
|
//Outputs:
|
|
// -geneticAnalysis.PersonDiscreteTraitInfo: Trait analysis object
|
|
// -error
|
|
func GetPersonDiscreteTraitAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, traitObject traits.Trait)(geneticAnalysis.PersonDiscreteTraitInfo, error){
|
|
|
|
// Map Structure: Genome Identifier -> PersonGenomeDiscreteTraitInfo
|
|
newPersonTraitInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomeDiscreteTraitInfo)
|
|
|
|
for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{
|
|
|
|
genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier
|
|
genomeMap := genomeWithMetadataObject.GenomeMap
|
|
|
|
newPersonGenomeTraitInfo := geneticAnalysis.PersonGenomeDiscreteTraitInfo{}
|
|
|
|
neuralNetworkExists, neuralNetworkOutcomeIsKnown, predictedOutcome, predictionConfidence, quantityOfLociTested, quantityOfPhasedLoci, err := GetGenomeDiscreteTraitAnalysis_NeuralNetwork(traitObject, genomeMap, true)
|
|
if (err != nil) { return geneticAnalysis.PersonDiscreteTraitInfo{}, err }
|
|
if (neuralNetworkExists == true){
|
|
|
|
newPersonGenomeTraitInfo.NeuralNetworkExists = true
|
|
|
|
if (neuralNetworkOutcomeIsKnown == true){
|
|
|
|
newPersonGenomeTraitInfo.NeuralNetworkAnalysisExists = true
|
|
|
|
newPersonGenomeTraitInfo_NeuralNetwork := geneticAnalysis.PersonGenomeDiscreteTraitInfo_NeuralNetwork{
|
|
|
|
PredictedOutcome: predictedOutcome,
|
|
PredictionConfidence: predictionConfidence,
|
|
QuantityOfLociKnown: quantityOfLociTested,
|
|
QuantityOfPhasedLoci: quantityOfPhasedLoci,
|
|
}
|
|
|
|
newPersonGenomeTraitInfo.NeuralNetworkAnalysis = newPersonGenomeTraitInfo_NeuralNetwork
|
|
}
|
|
}
|
|
|
|
anyRulesExist, quantityOfRulesTested, quantityOfLociKnown, genomePassesRulesMap, predictedOutcomeExists, predictedOutcome, err := GetGenomeDiscreteTraitAnalysis_Rules(traitObject, genomeMap, true)
|
|
if (err != nil) { return geneticAnalysis.PersonDiscreteTraitInfo{}, err }
|
|
if (anyRulesExist == true){
|
|
newPersonGenomeTraitInfo.AnyRulesExist = true
|
|
|
|
if (quantityOfRulesTested != 0){
|
|
|
|
newPersonGenomeTraitInfo.RulesAnalysisExists = true
|
|
|
|
newPersonGenomeTraitInfo_Rules := geneticAnalysis.PersonGenomeDiscreteTraitInfo_Rules{
|
|
|
|
GenomePassesRulesMap: genomePassesRulesMap,
|
|
PredictedOutcomeExists: predictedOutcomeExists,
|
|
PredictedOutcome: predictedOutcome,
|
|
QuantityOfRulesTested: quantityOfRulesTested,
|
|
QuantityOfLociKnown: quantityOfLociKnown,
|
|
}
|
|
|
|
newPersonGenomeTraitInfo.RulesAnalysis = newPersonGenomeTraitInfo_Rules
|
|
}
|
|
}
|
|
|
|
newPersonTraitInfoMap[genomeIdentifier] = newPersonGenomeTraitInfo
|
|
}
|
|
|
|
newPersonTraitInfoObject := geneticAnalysis.PersonDiscreteTraitInfo{
|
|
TraitInfoMap: newPersonTraitInfoMap,
|
|
}
|
|
|
|
if (len(newPersonTraitInfoMap) <= 1){
|
|
// We do not need to check for conflicts, there is only <=1 genome with trait information
|
|
// Nothing left to do. Analysis is complete.
|
|
return newPersonTraitInfoObject, nil
|
|
}
|
|
|
|
// We check for conflicts
|
|
|
|
getConflictExistsBool := func()(bool, error){
|
|
|
|
// We check to see if the analysis results are the same for all genomes
|
|
|
|
firstItemReached := false
|
|
personGenomeTraitInfoObject := geneticAnalysis.PersonGenomeDiscreteTraitInfo{}
|
|
|
|
for _, genomeTraitInfoObject := range newPersonTraitInfoMap{
|
|
|
|
if (firstItemReached == false){
|
|
personGenomeTraitInfoObject = genomeTraitInfoObject
|
|
continue
|
|
}
|
|
|
|
areEqual := reflect.DeepEqual(personGenomeTraitInfoObject, genomeTraitInfoObject)
|
|
if (areEqual == false){
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
conflictExists, err := getConflictExistsBool()
|
|
if (err != nil) { return geneticAnalysis.PersonDiscreteTraitInfo{}, err }
|
|
|
|
newPersonTraitInfoObject.ConflictExists = conflictExists
|
|
|
|
return newPersonTraitInfoObject, nil
|
|
}
|
|
|
|
|
|
//Outputs:
|
|
// -int: Base pair disease locus risk weight
|
|
// -bool: Base pair disease locus odds ratio known
|
|
// -float64: Base pair disease locus odds ratio
|
|
// -error
|
|
func GetGenomePolygenicDiseaseLocusRiskInfo(locusRiskWeightsMap map[string]int, locusOddsRatiosMap map[string]float64, locusBase1Value string, locusBase2Value string)(int, bool, float64, error){
|
|
|
|
locusBasePairJoined := locusBase1Value + ";" + locusBase2Value
|
|
|
|
riskWeight, exists := locusRiskWeightsMap[locusBasePairJoined]
|
|
if (exists == false){
|
|
// This is an unknown base combination
|
|
// We will treat it as a 0 risk weight
|
|
return 0, true, 1, nil
|
|
}
|
|
|
|
if (riskWeight == 0){
|
|
return 0, true, 1, nil
|
|
}
|
|
|
|
oddsRatio, exists := locusOddsRatiosMap[locusBasePairJoined]
|
|
if (exists == false){
|
|
return riskWeight, false, 0, nil
|
|
}
|
|
|
|
return riskWeight, true, oddsRatio, nil
|
|
}
|
|
|
|
// We use this to generate trait predictions using a neural network
|
|
// The alternative prediction method is to use Rules (see GetGenomeTraitAnalysis_Rules)
|
|
//Outputs:
|
|
// -bool: Trait Neural network analysis available (if false, we can't predict this trait using a neural network)
|
|
// -bool: Neural network outcome is known (at least 1 locus value is known which is needed for the neural network
|
|
// -string: The predicted outcome (Example: "Blue")
|
|
// -int: Probability (0-100) that the outcome from neural network is true (confidence)
|
|
// -int: Quantity of loci tested
|
|
// -int: Quantity of phased loci
|
|
// -error
|
|
func GetGenomeDiscreteTraitAnalysis_NeuralNetwork(traitObject traits.Trait, genomeMap map[int64]locusValue.LocusValue, checkForAliases bool)(bool, bool, string, int, int, int, error){
|
|
|
|
getGenomeLocusValuesMap := func()(map[int64]locusValue.LocusValue, error){
|
|
|
|
if (checkForAliases == false){
|
|
// We don't need to check for rsID aliases.
|
|
return genomeMap, nil
|
|
}
|
|
|
|
traitLociList := traitObject.LociList
|
|
|
|
// This map contains the locus values for the genome
|
|
// If a locus's entry doesn't exist, its value is unknown
|
|
// Map Structure: Locus rsID -> Locus Value
|
|
genomeLocusValuesMap := make(map[int64]locusValue.LocusValue)
|
|
|
|
for _, locusRSID := range traitLociList{
|
|
|
|
locusBasePairKnown, _, _, _, locusValueObject, err := GetLocusValueFromGenomeMap(checkForAliases, genomeMap, locusRSID)
|
|
if (err != nil) { return nil, err }
|
|
if (locusBasePairKnown == false){
|
|
continue
|
|
}
|
|
|
|
genomeLocusValuesMap[locusRSID] = locusValueObject
|
|
}
|
|
|
|
return genomeLocusValuesMap, nil
|
|
}
|
|
|
|
genomeLocusValuesMap, err := getGenomeLocusValuesMap()
|
|
if (err != nil) { return false, false, "", 0, 0, 0, err }
|
|
|
|
traitName := traitObject.TraitName
|
|
|
|
neuralNetworkModelExists, traitPredictionIsPossible, predictedOutcome, predictionConfidence, quantityOfLociKnown, quantityOfPhasedLoci, err := geneticPrediction.GetNeuralNetworkTraitPredictionFromGenomeMap(traitName, genomeLocusValuesMap)
|
|
if (err != nil) { return false, false, "", 0, 0, 0, err }
|
|
if (neuralNetworkModelExists == false){
|
|
return false, false, "", 0, 0, 0, nil
|
|
}
|
|
if (traitPredictionIsPossible == false){
|
|
return true, false, "", 0, 0, 0, nil
|
|
}
|
|
|
|
return true, true, predictedOutcome, predictionConfidence, quantityOfLociKnown, quantityOfPhasedLoci, nil
|
|
}
|
|
|
|
//Outputs:
|
|
// -bool: Rule-based trait prediction is available
|
|
// -int: Quantity of trait rules tested
|
|
// -int: Quantity of loci known
|
|
// -map[[3]byte]bool: Passed rules map (Rule Identifier -> Genome passes rule)
|
|
// -bool: Rule-based prediction outcome is known (at least 1 rule has been tested and there is no outcome tie)
|
|
// -string: The predicted outcome (Example: "Tolerant")
|
|
// -error
|
|
func GetGenomeDiscreteTraitAnalysis_Rules(traitObject traits.Trait, genomeMap map[int64]locusValue.LocusValue, checkForAliases bool)(bool, int, int, map[[3]byte]bool, bool, string, error){
|
|
|
|
traitIsDiscreteOrNumeric := traitObject.DiscreteOrNumeric
|
|
if (traitIsDiscreteOrNumeric != "Discrete"){
|
|
return false, 0, 0, nil, false, "", errors.New("GetGenomeDiscreteTraitAnalysis_Rules called with non-discrete trait.")
|
|
}
|
|
|
|
traitRulesList := traitObject.RulesList
|
|
|
|
if (len(traitRulesList) == 0){
|
|
// We can't predict this trait using rules
|
|
// This means that neural network prediction is the only alternative potential way to predict this trait
|
|
return false, 0, 0, nil, false, "", nil
|
|
}
|
|
|
|
// This map contains the trait outcome scores for the genome
|
|
// Map Structure: Outcome Name -> Score
|
|
// Example: "Intolerant" -> 5
|
|
traitOutcomeScoresMap := make(map[string]int)
|
|
|
|
// Map Structure: Rule Identifier -> Genome Passes rule (true if the genome passes the rule)
|
|
personPassesRulesMap := make(map[[3]byte]bool)
|
|
|
|
// This map stores each known loci
|
|
// Multiple rules can use the same loci, so we need a map to avoid duplicates
|
|
// Map Structure: Locus RSID -> Locus is known
|
|
lociAreKnownMap := make(map[int64]bool)
|
|
|
|
for _, ruleObject := range traitRulesList{
|
|
|
|
ruleIdentifierHex := ruleObject.RuleIdentifier
|
|
|
|
ruleIdentifier, err := encoding.DecodeHexStringTo3ByteArray(ruleIdentifierHex)
|
|
if (err != nil) { return false, 0, 0, nil, false, "", err }
|
|
|
|
ruleLociList := ruleObject.LociList
|
|
|
|
for _, locusObject := range ruleLociList{
|
|
|
|
locusRSID := locusObject.LocusRSID
|
|
|
|
_, exists := lociAreKnownMap[locusRSID]
|
|
if (exists == true){
|
|
// We already know if this locus is known
|
|
continue
|
|
}
|
|
locusIsKnown, _, _, _, _, err := GetLocusValueFromGenomeMap(checkForAliases, genomeMap, locusRSID)
|
|
if (err != nil) { return false, 0, 0, nil, false, "", err }
|
|
|
|
lociAreKnownMap[locusRSID] = locusIsKnown
|
|
}
|
|
|
|
genomePassesRuleIsKnown, genomePassesRule, err := GetGenomePassesDiscreteTraitRuleStatus(ruleLociList, genomeMap, checkForAliases)
|
|
if (err != nil) { return false, 0, 0, nil, false, "", err }
|
|
if (genomePassesRuleIsKnown == false){
|
|
continue
|
|
}
|
|
|
|
personPassesRulesMap[ruleIdentifier] = genomePassesRule
|
|
|
|
// The rule has been passed by this genome
|
|
// We add the outcome points for the rule to the traitOutcomeScoresMap
|
|
|
|
ruleOutcomePointsMap := ruleObject.OutcomePointsMap
|
|
|
|
for traitOutcome, pointsChange := range ruleOutcomePointsMap{
|
|
|
|
traitOutcomeScoresMap[traitOutcome] += pointsChange
|
|
}
|
|
}
|
|
|
|
quantityOfLociKnown := 0
|
|
|
|
for _, locusIsKnown := range lociAreKnownMap{
|
|
if (locusIsKnown == true){
|
|
quantityOfLociKnown += 1
|
|
}
|
|
}
|
|
|
|
quantityOfRulesTested := len(personPassesRulesMap)
|
|
|
|
if (quantityOfRulesTested == 0){
|
|
return true, 0, quantityOfLociKnown, personPassesRulesMap, false, "", nil
|
|
}
|
|
|
|
// -bool: Outcome is known (will be false if there is a tie
|
|
// -string:
|
|
// -error
|
|
getOutcome := func()(bool, string, error){
|
|
|
|
largestOutcome := ""
|
|
largestOutcomePoints := 0
|
|
tieExists := false
|
|
|
|
for outcomeName, outcomePoints := range traitOutcomeScoresMap{
|
|
|
|
if (outcomePoints < 1){
|
|
// This should never happen, because outcomes points should only be increased by integers which are at least 1 or greater
|
|
return false, "", errors.New("traitOutcomeScoresMap contains outcomePoints < 1.")
|
|
}
|
|
|
|
if (outcomePoints > largestOutcomePoints){
|
|
largestOutcome = outcomeName
|
|
largestOutcomePoints = outcomePoints
|
|
tieExists = false
|
|
continue
|
|
|
|
} else if (outcomePoints == largestOutcomePoints){
|
|
tieExists = true
|
|
}
|
|
}
|
|
|
|
if (tieExists == true){
|
|
return false, "", nil
|
|
}
|
|
|
|
return true, largestOutcome, nil
|
|
}
|
|
|
|
outcomeIsKnown, outcomeName, err := getOutcome()
|
|
if (err != nil) { return false, 0, 0, nil, false, "", err }
|
|
if (outcomeIsKnown == false){
|
|
return true, quantityOfRulesTested, quantityOfLociKnown, personPassesRulesMap, false, "", nil
|
|
}
|
|
|
|
return true, quantityOfRulesTested, quantityOfLociKnown, personPassesRulesMap, true, outcomeName, nil
|
|
}
|
|
|
|
// This function checks to see if a genome will pass a trait rule
|
|
// Outputs:
|
|
// -bool: Genome passes trait rule status is known
|
|
// -bool: Genome passes trait rule
|
|
// -error
|
|
func GetGenomePassesDiscreteTraitRuleStatus(ruleLociList []traits.RuleLocus, genomeMap map[int64]locusValue.LocusValue, checkForAliases bool)(bool, bool, error){
|
|
|
|
// We check to see if genome passes all rule loci
|
|
// To pass a rule, all of the rule's loci must be passed by the provided genome
|
|
// We consider a rule Known if the genome either passes all loci, or fails to pass 1 locus
|
|
// We consider a rule Unknown if any loci are unknown, and there are no rules which are known not to be passed
|
|
|
|
anyLocusIsUnknown := false
|
|
|
|
for _, locusObject := range ruleLociList{
|
|
|
|
locusRSID := locusObject.LocusRSID
|
|
|
|
locusBasePairKnown, locusBase1, locusBase2, _, _, err := GetLocusValueFromGenomeMap(checkForAliases, genomeMap, locusRSID)
|
|
if (err != nil) { return false, false, err }
|
|
if (locusBasePairKnown == false){
|
|
anyLocusIsUnknown = true
|
|
// We keep searching to see if any of the rule's loci are known to not pass
|
|
continue
|
|
}
|
|
|
|
locusBasePairJoined := locusBase1 + ";" + locusBase2
|
|
|
|
locusBasePairsList := locusObject.BasePairsList
|
|
|
|
genomePassesRuleLocus := slices.Contains(locusBasePairsList, locusBasePairJoined)
|
|
if (genomePassesRuleLocus == false){
|
|
// The genome has failed to pass a single rule locus, thus, the rule is not passed
|
|
return true, false, nil
|
|
}
|
|
}
|
|
|
|
if (anyLocusIsUnknown == true){
|
|
// The rule is not passed, but it's status is unknown
|
|
// There were no rules which were known not to pass
|
|
return false, false, nil
|
|
}
|
|
|
|
// All rules were passed
|
|
|
|
return true, true, nil
|
|
}
|
|
|
|
|
|
// This function will retrieve the base pair of the locus from the input genome map
|
|
// We use this function because each rsID has aliases, so we must sometimes check those aliases to find locus values
|
|
//
|
|
// Outputs:
|
|
// -bool: Valid base pair value found
|
|
// -string: Base 1 Value (Nucleotide base for the SNP)
|
|
// -string: Base 2 Value (Nucleotide base for the SNP)
|
|
// -bool: Locus base pair is phased
|
|
// -locusValue.LocusValue
|
|
// -error
|
|
func GetLocusValueFromGenomeMap(checkForAliases bool, inputGenomeMap map[int64]locusValue.LocusValue, locusRSID int64)(bool, string, string, bool, locusValue.LocusValue, error){
|
|
|
|
// Outputs:
|
|
// -bool: Locus value found
|
|
// -locusValue.LocusValue
|
|
// -error
|
|
getLocusValue := func()(bool, locusValue.LocusValue, error){
|
|
|
|
currentLocusValue, exists := inputGenomeMap[locusRSID]
|
|
if (exists == true){
|
|
return true, currentLocusValue, nil
|
|
}
|
|
|
|
if (checkForAliases == false){
|
|
return false, locusValue.LocusValue{}, nil
|
|
}
|
|
|
|
// We check for aliases
|
|
|
|
anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(locusRSID)
|
|
if (err != nil) { return false, locusValue.LocusValue{}, err }
|
|
if (anyAliasesExist == false){
|
|
return false, locusValue.LocusValue{}, nil
|
|
}
|
|
|
|
for _, rsidAlias := range rsidAliasesList{
|
|
|
|
currentLocusValue, exists := inputGenomeMap[rsidAlias]
|
|
if (exists == true){
|
|
return true, currentLocusValue, nil
|
|
}
|
|
}
|
|
|
|
return false, locusValue.LocusValue{}, nil
|
|
}
|
|
|
|
locusValueFound, locusValueObject, err := getLocusValue()
|
|
if (err != nil) { return false, "", "", false, locusValue.LocusValue{}, err }
|
|
if (locusValueFound == false){
|
|
return false, "", "", false, locusValue.LocusValue{}, nil
|
|
}
|
|
|
|
base1Value := locusValueObject.Base1Value
|
|
base2Value := locusValueObject.Base2Value
|
|
locusIsPhased := locusValueObject.LocusIsPhased
|
|
|
|
return true, base1Value, base2Value, locusIsPhased, locusValueObject, nil
|
|
}
|
|
|
|
|