seekia/internal/genetics/createGeneticAnalysis/createGeneticAnalysis.go

2545 lines
94 KiB
Go

// createGeneticAnalysis provides functions to create a genetic analysis
// These are performed on one or more genome files.
// They produce 3 kinds of results: Monogenic Diseases, Polygenic Diseases and Traits
// They can be performed on a Person or a Couple
// Couple analyses provide an analysis of the prospective offspring of the couple
package createGeneticAnalysis
// Disclaimer: I am a novice in the ways of genetics. This package could be flawed in numerous ways.
// TODO: We want to eventually use neural nets for both trait and polygenic disease analysis (see geneticPrediction.go)
// These will be trained on a set of genomes and will output a probability analysis for each trait/disease
// This is only possible once we get access to the necessary training data
//
// This is how offspring trait prediction could work with the neural net model:
// Both users will share all relevant SNPS base pairs that determine the trait on their profile.
// Each location has 4 possible outcomes, so for 1000 SNPs, there are 4^1000 possible offspring outcomes for a given couple. (this
// is actually too high because recombination break points do not occur at each locus, see genetic linkage)
// This is too many options for us to check all of them.
// Seekia will create 100 offspring that would be produced from both users, and run each offspring through the neural net.
// Each offspring would be different. The allele from each parent for each SNP would be randomly chosen.
// The user can choose how many prospective offspring to create in the settings.
// More offspring will take longer, but will yield a more accurate trait probability.
// Seekia will show the the average trait result and a chart showing the trait results for all created offspring.
// TODO: Add the ability to weight different genome files based on their reliability.
// Some files are much more accurate because they record each location many times.
import "seekia/resources/geneticReferences/locusMetadata"
import "seekia/resources/geneticReferences/monogenicDiseases"
import "seekia/resources/geneticReferences/polygenicDiseases"
import "seekia/resources/geneticReferences/traits"
import "seekia/internal/encoding"
import "seekia/internal/genetics/geneticAnalysis"
import "seekia/internal/genetics/locusValue"
import "seekia/internal/genetics/prepareRawGenomes"
import "seekia/internal/helpers"
import "errors"
import mathRand "math/rand/v2"
import "strings"
import "slices"
import "maps"
func verifyBasePair(inputBasePair string)bool{
base1, base2, delimiterFound := strings.Cut(inputBasePair, ";")
if (delimiterFound == false){
return false
}
// I = Insertion
// D = Deletion
validBasesList := []string{"C", "A", "T", "G", "I", "D"}
baseIsValid := slices.Contains(validBasesList, base1)
if (baseIsValid == false){
return false
}
baseIsValid = slices.Contains(validBasesList, base2)
if (baseIsValid == false){
return false
}
return true
}
//Outputs:
// -bool: Process completed (it was not stopped manually before completion)
// -string: New Genetic analysis string (Encoded in MessagePack)
// -error
func CreatePersonGeneticAnalysis(genomesList []prepareRawGenomes.RawGenomeWithMetadata, updatePercentageCompleteFunction func(int)error, checkIfProcessIsStopped func()bool)(bool, string, error){
prepareRawGenomesUpdatePercentageCompleteFunction := func(newPercentage int)error{
newPercentageCompletion, err := helpers.ScaleNumberProportionally(true, newPercentage, 0, 100, 0, 50)
if (err != nil){ return err }
err = updatePercentageCompleteFunction(newPercentageCompletion)
if (err != nil) { return err }
return nil
}
genomesWithMetadataList, allRawGenomeIdentifiersList, multipleGenomesExist, onlyExcludeConflictsGenomeIdentifier, onlyIncludeSharedGenomeIdentifier, err := prepareRawGenomes.GetGenomesWithMetadataListFromRawGenomesList(genomesList, prepareRawGenomesUpdatePercentageCompleteFunction)
if (err != nil) { return false, "", err }
newGeneticAnalysisObject := geneticAnalysis.PersonAnalysis{
AnalysisVersion: 1,
CombinedGenomesExist: multipleGenomesExist,
AllRawGenomeIdentifiersList: allRawGenomeIdentifiersList,
}
if (multipleGenomesExist == true){
newGeneticAnalysisObject.OnlyExcludeConflictsGenomeIdentifier = onlyExcludeConflictsGenomeIdentifier
newGeneticAnalysisObject.OnlyIncludeSharedGenomeIdentifier = onlyIncludeSharedGenomeIdentifier
}
processIsStopped := checkIfProcessIsStopped()
if (processIsStopped == true){
return false, "", nil
}
monogenicDiseasesList, err := monogenicDiseases.GetMonogenicDiseaseObjectsList()
if (err != nil) { return false, "", err }
// Map Structure: Disease Name -> PersonMonogenicDiseaseInfo
analysisMonogenicDiseasesMap := make(map[string]geneticAnalysis.PersonMonogenicDiseaseInfo)
for _, monogenicDiseaseObject := range monogenicDiseasesList{
diseaseName := monogenicDiseaseObject.DiseaseName
personDiseaseAnalysisObject, err := getPersonMonogenicDiseaseAnalysis(genomesWithMetadataList, monogenicDiseaseObject)
if (err != nil) { return false, "", err }
analysisMonogenicDiseasesMap[diseaseName] = personDiseaseAnalysisObject
}
newGeneticAnalysisObject.MonogenicDiseasesMap = analysisMonogenicDiseasesMap
polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList()
if (err != nil) { return false, "", err }
// Map Structure: Disease Name -> PersonPolygenicDiseaseInfo
analysisPolygenicDiseasesMap := make(map[string]geneticAnalysis.PersonPolygenicDiseaseInfo)
for _, diseaseObject := range polygenicDiseaseObjectsList{
personDiseaseAnalysisObject, err := getPersonPolygenicDiseaseAnalysis(genomesWithMetadataList, diseaseObject)
if (err != nil) { return false, "", err }
diseaseName := diseaseObject.DiseaseName
analysisPolygenicDiseasesMap[diseaseName] = personDiseaseAnalysisObject
}
newGeneticAnalysisObject.PolygenicDiseasesMap = analysisPolygenicDiseasesMap
traitObjectsList, err := traits.GetTraitObjectsList()
if (err != nil) { return false, "", err }
// Map Structure: Trait Name -> PersonTraitInfo
analysisTraitsMap := make(map[string]geneticAnalysis.PersonTraitInfo)
for _, traitObject := range traitObjectsList{
personTraitAnalysisObject, err := getPersonTraitAnalysis(genomesWithMetadataList, traitObject)
if (err != nil) { return false, "", err }
traitName := traitObject.TraitName
analysisTraitsMap[traitName] = personTraitAnalysisObject
}
newGeneticAnalysisObject.TraitsMap = analysisTraitsMap
analysisBytes, err := encoding.EncodeMessagePackBytes(newGeneticAnalysisObject)
if (err != nil) { return false, "", err }
analysisString := string(analysisBytes)
return true, analysisString, nil
}
//Outputs:
// -bool: Process completed (was not manually stopped mid-way)
// -string: Couple genetic analysis string (encoded in MessagePack)
// -error
func CreateCoupleGeneticAnalysis(person1GenomesList []prepareRawGenomes.RawGenomeWithMetadata, person2GenomesList []prepareRawGenomes.RawGenomeWithMetadata, updatePercentageCompleteFunction func(int)error, checkIfProcessIsStopped func()bool)(bool, string, error){
person1PrepareRawGenomesUpdatePercentageCompleteFunction := func(newPercentage int)error{
newPercentageCompletion, err := helpers.ScaleNumberProportionally(true, newPercentage, 0, 100, 0, 25)
if (err != nil){ return err }
err = updatePercentageCompleteFunction(newPercentageCompletion)
if (err != nil) { return err }
return nil
}
person1GenomesWithMetadataList, allPerson1RawGenomeIdentifiersList, person1HasMultipleGenomes, person1OnlyExcludeConflictsGenomeIdentifier, person1OnlyIncludeSharedGenomeIdentifier, err := prepareRawGenomes.GetGenomesWithMetadataListFromRawGenomesList(person1GenomesList, person1PrepareRawGenomesUpdatePercentageCompleteFunction)
if (err != nil) { return false, "", err }
processIsStopped := checkIfProcessIsStopped()
if (processIsStopped == true){
return false, "", nil
}
person2PrepareRawGenomesUpdatePercentageCompleteFunction := func(newPercentage int)error{
newPercentageCompletion, err := helpers.ScaleNumberProportionally(true, newPercentage, 0, 100, 25, 50)
if (err != nil){ return err }
err = updatePercentageCompleteFunction(newPercentageCompletion)
if (err != nil) { return err }
return nil
}
person2GenomesWithMetadataList, allPerson2RawGenomeIdentifiersList, person2HasMultipleGenomes, person2OnlyExcludeConflictsGenomeIdentifier, person2OnlyIncludeSharedGenomeIdentifier, err := prepareRawGenomes.GetGenomesWithMetadataListFromRawGenomesList(person2GenomesList, person2PrepareRawGenomesUpdatePercentageCompleteFunction)
if (err != nil) { return false, "", err }
processIsStopped = checkIfProcessIsStopped()
if (processIsStopped == true){
return false, "", nil
}
// The analysis will analyze either 1 or 2 genome pairs
// The gui will display the results from each pair
//Outputs:
// -[16]byte: Pair 1 Person 1 Genome Identifier
// -[16]byte: Pair 1 Person 2 Genome Identifier
// -bool: Second pair exists
// -[16]byte: Pair 2 Person 1 Genome Identifier
// -[16]byte: Pair 2 Person 2 Genome Identifier
// -error
getGenomePairsToAnalyze := func()([16]byte, [16]byte, bool, [16]byte, [16]byte, error){
if (person1HasMultipleGenomes == true && person2HasMultipleGenomes == true){
return person1OnlyExcludeConflictsGenomeIdentifier, person2OnlyExcludeConflictsGenomeIdentifier, true, person1OnlyIncludeSharedGenomeIdentifier, person2OnlyIncludeSharedGenomeIdentifier, nil
}
if (person1HasMultipleGenomes == true && person2HasMultipleGenomes == false){
person2GenomeIdentifier := allPerson2RawGenomeIdentifiersList[0]
return person1OnlyExcludeConflictsGenomeIdentifier, person2GenomeIdentifier, true, person1OnlyIncludeSharedGenomeIdentifier, person2GenomeIdentifier, nil
}
if (person1HasMultipleGenomes == false && person2HasMultipleGenomes == true){
person1GenomeIdentifier := allPerson1RawGenomeIdentifiersList[0]
return person1GenomeIdentifier, person2OnlyExcludeConflictsGenomeIdentifier, true, person1GenomeIdentifier, person2OnlyIncludeSharedGenomeIdentifier, nil
}
//person1HasMultipleGenomes == false && person2HasMultipleGenomes == false
person1GenomeIdentifier := allPerson1RawGenomeIdentifiersList[0]
person2GenomeIdentifier := allPerson2RawGenomeIdentifiersList[0]
return person1GenomeIdentifier, person2GenomeIdentifier, false, [16]byte{}, [16]byte{}, nil
}
pair1Person1GenomeIdentifier, pair1Person2GenomeIdentifier, genomePair2Exists, pair2Person1GenomeIdentifier, pair2Person2GenomeIdentifier, err := getGenomePairsToAnalyze()
if (err != nil){ return false, "", err }
newCoupleAnalysis := geneticAnalysis.CoupleAnalysis{
AnalysisVersion: 1,
Pair1Person1GenomeIdentifier: pair1Person1GenomeIdentifier,
Pair1Person2GenomeIdentifier: pair1Person2GenomeIdentifier,
SecondPairExists: genomePair2Exists,
Person1HasMultipleGenomes: person1HasMultipleGenomes,
Person2HasMultipleGenomes: person2HasMultipleGenomes,
}
if (genomePair2Exists == true){
// At least 1 of the people have multiple genomes
newCoupleAnalysis.Pair2Person1GenomeIdentifier = pair2Person1GenomeIdentifier
newCoupleAnalysis.Pair2Person2GenomeIdentifier = pair2Person2GenomeIdentifier
if (person1HasMultipleGenomes == true){
newCoupleAnalysis.Person1OnlyExcludeConflictsGenomeIdentifier = person1OnlyExcludeConflictsGenomeIdentifier
newCoupleAnalysis.Person1OnlyIncludeSharedGenomeIdentifier = person1OnlyIncludeSharedGenomeIdentifier
}
if (person2HasMultipleGenomes == true){
newCoupleAnalysis.Person2OnlyExcludeConflictsGenomeIdentifier = person2OnlyExcludeConflictsGenomeIdentifier
newCoupleAnalysis.Person2OnlyIncludeSharedGenomeIdentifier = person2OnlyIncludeSharedGenomeIdentifier
}
}
// We compute and add monogenic disease probabilities
monogenicDiseasesList, err := monogenicDiseases.GetMonogenicDiseaseObjectsList()
if (err != nil) { return false, "", err }
// Map Structure: Disease Name -> OffspringMonogenicDiseaseInfo
offspringMonogenicDiseasesMap := make(map[string]geneticAnalysis.OffspringMonogenicDiseaseInfo)
for _, diseaseObject := range monogenicDiseasesList{
diseaseName := diseaseObject.DiseaseName
variantsList := diseaseObject.VariantsList
diseaseIsDominantOrRecessive := diseaseObject.DominantOrRecessive
person1DiseaseAnalysisObject, err := getPersonMonogenicDiseaseAnalysis(person1GenomesWithMetadataList, diseaseObject)
if (err != nil) { return false, "", err }
person2DiseaseAnalysisObject, err := getPersonMonogenicDiseaseAnalysis(person2GenomesWithMetadataList, diseaseObject)
if (err != nil) { return false, "", err }
// This map stores the number of variants tested in each person's genome
// Map Structure: Genome Identifier -> Number of variants tested
numberOfVariantsTestedMap := make(map[[16]byte]int)
// This map stores the offspring disease probabilities for each genome pair.
// A genome pair is a concatenation of two genome identifiers
// If a map entry doesn't exist, the probabilities are unknown for that genome pair
// Map Structure: Genome Pair Identifier -> OffspringMonogenicDiseaseProbabilities
offspringMonogenicDiseaseInfoMap := make(map[[32]byte]geneticAnalysis.OffspringGenomePairMonogenicDiseaseInfo)
// This will calculate the probability of monogenic disease for the offspring from the two specified genomes
// It also calculates the probabilities for each monogenic disease variant for the offspring
// It then adds the genome pair disease information to the offspringMonogenicDiseaseInfoMap and numberOfVariantsTestedMap
addGenomePairInfoToDiseaseMaps := func(person1GenomeIdentifier [16]byte, person2GenomeIdentifier [16]byte)error{
//Outputs:
// -bool: Probability is known
// -int: Probability of passing a disease variant (value between 0 and 100)
// -int: Number of variants tested
getPersonWillPassDiseaseVariantProbability := func(personDiseaseAnalysisObject geneticAnalysis.PersonMonogenicDiseaseInfo, genomeIdentifier [16]byte)(bool, int, int){
personDiseaseInfoMap := personDiseaseAnalysisObject.MonogenicDiseaseInfoMap
personGenomeDiseaseInfoObject, exists := personDiseaseInfoMap[genomeIdentifier]
if (exists == false){
return false, 0, 0
}
personGenomeProbabilityOfPassingADiseaseVariant := personGenomeDiseaseInfoObject.ProbabilityOfPassingADiseaseVariant
personGenomeNumberOfVariantsTested := personGenomeDiseaseInfoObject.NumberOfVariantsTested
return true, personGenomeProbabilityOfPassingADiseaseVariant, personGenomeNumberOfVariantsTested
}
person1ProbabilityIsKnown, person1WillPassVariantProbability, person1NumberOfVariantsTested := getPersonWillPassDiseaseVariantProbability(person1DiseaseAnalysisObject, person1GenomeIdentifier)
person2ProbabilityIsKnown, person2WillPassVariantProbability, person2NumberOfVariantsTested := getPersonWillPassDiseaseVariantProbability(person2DiseaseAnalysisObject, person2GenomeIdentifier)
offspringHasDiseaseProbabilityIsKnown, percentageProbabilityOffspringHasDisease, offspringHasAVariantProbabilityIsKnown, percentageProbabilityOffspringHasVariant, err := GetOffspringMonogenicDiseaseProbabilities(diseaseIsDominantOrRecessive, person1ProbabilityIsKnown, person1WillPassVariantProbability, person2ProbabilityIsKnown, person2WillPassVariantProbability)
if (err != nil) { return err }
// Now we calculate the probabilities for individual variants
offspringVariantsInfoMap := make(map[[3]byte]geneticAnalysis.OffspringMonogenicDiseaseVariantInfo)
for _, variantObject := range variantsList{
variantIdentifierHex := variantObject.VariantIdentifier
variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex)
if (err != nil) { return err }
//Outputs:
// -bool: Probability is known
// -float64: Probability that person will pass variant to offspring (between 0 and 1)
// -error
getPersonWillPassVariantProbability := func(personDiseaseAnalysisObject geneticAnalysis.PersonMonogenicDiseaseInfo, personGenomeIdentifier [16]byte)(bool, float64, error){
personDiseaseInfoMap := personDiseaseAnalysisObject.MonogenicDiseaseInfoMap
personGenomeDiseaseInfoObject, exists := personDiseaseInfoMap[personGenomeIdentifier]
if (exists == false){
return false, 0, nil
}
personGenomeDiseaseVariantsMap := personGenomeDiseaseInfoObject.VariantsInfoMap
personVariantInfoObject, exists := personGenomeDiseaseVariantsMap[variantIdentifier]
if (exists == false){
// The genome does not have information about this variant
return false, 0, nil
}
base1HasVariant := personVariantInfoObject.Base1HasVariant
base2HasVariant := personVariantInfoObject.Base2HasVariant
if (base1HasVariant == true && base2HasVariant == true){
return true, 1, nil
}
if (base1HasVariant == true && base2HasVariant == false){
return true, 0.5, nil
}
if (base1HasVariant == false && base2HasVariant == true){
return true, 0.5, nil
}
// Neither base has a variant
return true, 0, nil
}
person1VariantProbabilityIsKnown, person1WillPassVariantProbability, err := getPersonWillPassVariantProbability(person1DiseaseAnalysisObject, person1GenomeIdentifier)
if (err != nil) { return err }
person2VariantProbabilityIsKnown, person2WillPassVariantProbability, err := getPersonWillPassVariantProbability(person2DiseaseAnalysisObject, person2GenomeIdentifier)
if (err != nil) { return err }
if (person1VariantProbabilityIsKnown == false && person2VariantProbabilityIsKnown == false){
continue
}
// Outputs:
// -float64: Best Case Person 1 will pass variant probability (0-1)
// -float64: Worst Case Person 1 will pass variant probability (0-1)
// -float64: Best Case Person 2 will pass variant probability (0-1)
// -float64: Worst Case Person 2 will pass variant probability (0-1)
getBestAndWorstCaseProbabilities := func()(float64, float64, float64, float64){
if (person1VariantProbabilityIsKnown == true && person2VariantProbabilityIsKnown == false){
return person1WillPassVariantProbability, person1WillPassVariantProbability, float64(0), float64(1)
}
if (person1VariantProbabilityIsKnown == false && person2VariantProbabilityIsKnown == true){
return float64(0), float64(1), person2WillPassVariantProbability, person2WillPassVariantProbability
}
// Both people's probabilities are known
return person1WillPassVariantProbability, person1WillPassVariantProbability, person2WillPassVariantProbability, person2WillPassVariantProbability
}
bestCasePerson1WillPassVariantProbability, worstCasePerson1WillPassVariantProbability, bestCasePerson2WillPassVariantProbability, worstCasePerson2WillPassVariantProbability := getBestAndWorstCaseProbabilities()
//Outputs:
// -int: Percentage Probability of 0 mutations
// -int: Percentage Probability of 1 mutation
// -int: Percentage Probability of 2 mutations
// -error
getOffspringVariantMutationProbabilities := func(person1WillPassVariantProbability float64, person2WillPassVariantProbability float64)(int, int, int, error){
// This is the probability that neither person will pass the variant
// P = P(!A) * P(!B)
probabilityOf0Mutations := (1 - person1WillPassVariantProbability) * (1 - person2WillPassVariantProbability)
// This is the probability that either person will pass the variant, but not both
// P(A XOR B) = P(A) + P(B) - (2 * P(A and B))
probabilityOf1Mutation := person1WillPassVariantProbability + person2WillPassVariantProbability - (2 * person1WillPassVariantProbability * person2WillPassVariantProbability)
// This is the probability that both people will pass the variant
// P(A and B) = P(A) * P(B)
probabilityOf2Mutations := person1WillPassVariantProbability * person2WillPassVariantProbability
percentageProbabilityOf0Mutations, err := helpers.FloorFloat64ToInt(probabilityOf0Mutations * 100)
if (err != nil) { return 0, 0, 0, err }
percentageProbabilityOf1Mutation, err := helpers.FloorFloat64ToInt(probabilityOf1Mutation * 100)
if (err != nil) { return 0, 0, 0, err }
percentageProbabilityOf2Mutations, err := helpers.FloorFloat64ToInt(probabilityOf2Mutations * 100)
if (err != nil) { return 0, 0, 0, err }
return percentageProbabilityOf0Mutations, percentageProbabilityOf1Mutation, percentageProbabilityOf2Mutations, nil
}
bestCase0MutationsProbability, bestCase1MutationProbability, bestCase2MutationsProbability, err := getOffspringVariantMutationProbabilities(bestCasePerson1WillPassVariantProbability, bestCasePerson2WillPassVariantProbability)
if (err != nil) { return err }
worstCase0MutationsProbability, worstCase1MutationProbability, worstCase2MutationsProbability, err := getOffspringVariantMutationProbabilities(worstCasePerson1WillPassVariantProbability, worstCasePerson2WillPassVariantProbability)
if (err != nil) { return err }
// We have to figure out which 1-mutation-probability is lower
// The best case probabilities can actually result in a higher probability for 1 mutation
// than the worst case probabilities
// This is because in a worst-case-scenario, the probability of 1 mutation might be 0 because
// the probability of 2 mutations is 100
// This is not needed for the 0 and 2 mutation probabilities because TODO
lowerBound1MutationProbability := min(bestCase1MutationProbability, worstCase1MutationProbability)
upperBound1MutationProbability := max(bestCase1MutationProbability, worstCase1MutationProbability)
newOffspringMonogenicDiseaseVariantInfoObject := geneticAnalysis.OffspringMonogenicDiseaseVariantInfo{
ProbabilityOf0MutationsLowerBound: worstCase0MutationsProbability,
ProbabilityOf0MutationsUpperBound: bestCase0MutationsProbability,
ProbabilityOf1MutationLowerBound: lowerBound1MutationProbability,
ProbabilityOf1MutationUpperBound: upperBound1MutationProbability,
ProbabilityOf2MutationsLowerBound: bestCase2MutationsProbability,
ProbabilityOf2MutationsUpperBound: worstCase2MutationsProbability,
}
offspringVariantsInfoMap[variantIdentifier] = newOffspringMonogenicDiseaseVariantInfoObject
}
if (person1ProbabilityIsKnown == false && person2ProbabilityIsKnown == false && len(offspringVariantsInfoMap) == 0){
// We have no information about this genome pair's disease risk
// We don't add this genome pair's disease info to the diseaseInfoMap
return nil
}
numberOfVariantsTestedMap[person1GenomeIdentifier] = person1NumberOfVariantsTested
numberOfVariantsTestedMap[person2GenomeIdentifier] = person2NumberOfVariantsTested
newOffspringGenomePairMonogenicDiseaseInfoObject := geneticAnalysis.OffspringGenomePairMonogenicDiseaseInfo{
ProbabilityOffspringHasDiseaseIsKnown: offspringHasDiseaseProbabilityIsKnown,
ProbabilityOffspringHasDisease: percentageProbabilityOffspringHasDisease,
ProbabilityOffspringHasVariantIsKnown: offspringHasAVariantProbabilityIsKnown,
ProbabilityOffspringHasVariant: percentageProbabilityOffspringHasVariant,
VariantsInfoMap: offspringVariantsInfoMap,
}
genomePairIdentifier := helpers.JoinTwo16ByteArrays(person1GenomeIdentifier, person2GenomeIdentifier)
offspringMonogenicDiseaseInfoMap[genomePairIdentifier] = newOffspringGenomePairMonogenicDiseaseInfoObject
return nil
}
err = addGenomePairInfoToDiseaseMaps(pair1Person1GenomeIdentifier, pair1Person2GenomeIdentifier)
if (err != nil) { return false, "", err }
if (genomePair2Exists == true){
err := addGenomePairInfoToDiseaseMaps(pair2Person1GenomeIdentifier, pair2Person2GenomeIdentifier)
if (err != nil) { return false, "", err }
}
newOffspringMonogenicDiseaseInfoObject := geneticAnalysis.OffspringMonogenicDiseaseInfo{
NumberOfVariantsTestedMap: numberOfVariantsTestedMap,
MonogenicDiseaseInfoMap: offspringMonogenicDiseaseInfoMap,
}
// Now we check for conflicts in monogenic disease results
// For couples, a conflict is when either genome pair has differing results for any disease probability/variant probability
// This is different from a person having conflicts within their different genomes
// Each genome pair only uses 1 genome from each person.
if (len(offspringMonogenicDiseaseInfoMap) >= 2){
// Conflicts are only possible if two genome pairs with information about this disease exist
checkIfConflictExists := func()(bool, error){
probabilityOffspringHasDiseaseIsKnown := false
probabilityOffspringHasDisease := 0
probabilityOffspringHasVariantIsKnown := false
probabilityOffspringHasVariant := 0
offspringVariantsInfoMap := make(map[[3]byte]geneticAnalysis.OffspringMonogenicDiseaseVariantInfo)
firstItemReached := false
for _, offspringMonogenicDiseaseInfoObject := range offspringMonogenicDiseaseInfoMap{
currentProbabilityOffspringHasDiseaseIsKnown := offspringMonogenicDiseaseInfoObject.ProbabilityOffspringHasDiseaseIsKnown
currentProbabilityOffspringHasDisease := offspringMonogenicDiseaseInfoObject.ProbabilityOffspringHasDisease
currentProbabilityOffspringHasVariantIsKnown := offspringMonogenicDiseaseInfoObject.ProbabilityOffspringHasVariantIsKnown
currentProbabilityOffspringHasVariant := offspringMonogenicDiseaseInfoObject.ProbabilityOffspringHasVariant
currentOffspringVariantsInfoMap := offspringMonogenicDiseaseInfoObject.VariantsInfoMap
if (firstItemReached == false){
probabilityOffspringHasDiseaseIsKnown = currentProbabilityOffspringHasDiseaseIsKnown
probabilityOffspringHasDisease = currentProbabilityOffspringHasDisease
probabilityOffspringHasVariantIsKnown = currentProbabilityOffspringHasVariantIsKnown
probabilityOffspringHasVariant = currentProbabilityOffspringHasVariant
offspringVariantsInfoMap = currentOffspringVariantsInfoMap
firstItemReached = true
continue
}
if (currentProbabilityOffspringHasDiseaseIsKnown != probabilityOffspringHasDiseaseIsKnown){
return true, nil
}
if (currentProbabilityOffspringHasDisease != probabilityOffspringHasDisease){
return true, nil
}
if (currentProbabilityOffspringHasVariantIsKnown != probabilityOffspringHasVariantIsKnown){
return true, nil
}
if (currentProbabilityOffspringHasVariant != probabilityOffspringHasVariant){
return true, nil
}
areEqual := maps.Equal(currentOffspringVariantsInfoMap, offspringVariantsInfoMap)
if (areEqual == false){
return true, nil
}
}
return false, nil
}
conflictExists, err := checkIfConflictExists()
if (err != nil) { return false, "", err }
newOffspringMonogenicDiseaseInfoObject.ConflictExists = conflictExists
}
offspringMonogenicDiseasesMap[diseaseName] = newOffspringMonogenicDiseaseInfoObject
}
newCoupleAnalysis.MonogenicDiseasesMap = offspringMonogenicDiseasesMap
// Step 2: Polygenic Diseases
polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList()
if (err != nil){ return false, "", err }
// Map Structure: Disease Name -> OffspringPolygenicDiseaseInfo
offspringPolygenicDiseasesMap := make(map[string]geneticAnalysis.OffspringPolygenicDiseaseInfo)
for _, diseaseObject := range polygenicDiseaseObjectsList{
diseaseName := diseaseObject.DiseaseName
diseaseLociList := diseaseObject.LociList
person1DiseaseAnalysisObject, err := getPersonPolygenicDiseaseAnalysis(person1GenomesWithMetadataList, diseaseObject)
if (err != nil) { return false, "", err }
person2DiseaseAnalysisObject, err := getPersonPolygenicDiseaseAnalysis(person2GenomesWithMetadataList, diseaseObject)
if (err != nil) { return false, "", err }
// This map stores the polygenic disease info for each genome pair
// Map Structure: Genome Pair Identifier -> OffspringGenomePairPolygenicDiseaseInfo
offspringPolygenicDiseaseInfoMap := make(map[[32]byte]geneticAnalysis.OffspringGenomePairPolygenicDiseaseInfo)
// This will calculate the disease info for the offspring from the two specified genomes to the diseases map
// It then adds the pair entry to the offspringPolygenicDiseaseInfoMap
addGenomePairDiseaseInfoToDiseaseMap := func(person1GenomeIdentifier [16]byte, person2GenomeIdentifier [16]byte)error{
summedRiskWeights := 0
minimumPossibleRiskWeightSum := 0
maximumPossibleRiskWeightSum := 0
// Map Structure: Locus Identifier -> OffspringPolygenicDiseaseLocusInfo
offspringDiseaseLociInfoMap := make(map[[3]byte]geneticAnalysis.OffspringPolygenicDiseaseLocusInfo)
for _, locusObject := range diseaseLociList{
locusIdentifierHex := locusObject.LocusIdentifier
locusIdentifier, err := encoding.DecodeHexStringTo3ByteArray(locusIdentifierHex)
if (err != nil) { return err }
locusRiskWeightsMap := locusObject.RiskWeightsMap
locusOddsRatiosMap := locusObject.OddsRatiosMap
//Outputs:
// -bool: Locus value exists
// -string: Base1 Value
// -string: Base2 Value
// -error
getPersonGenomeLocusValues := func(personGenomeIdentifier [16]byte, personDiseaseAnalysisObject geneticAnalysis.PersonPolygenicDiseaseInfo)(bool, string, string, error){
personPolygenicDiseaseInfoMap := personDiseaseAnalysisObject.PolygenicDiseaseInfoMap
personGenomeDiseaseInfoObject, exists := personPolygenicDiseaseInfoMap[personGenomeIdentifier]
if (exists == false){
// This person's genome has no information about loci related to this disease
return false, "", "", nil
}
personGenomeLociMap := personGenomeDiseaseInfoObject.LociInfoMap
personGenomeLocusInfoObject, exists := personGenomeLociMap[locusIdentifier]
if (exists == false){
// This person's genome doesn't have information about this locus
return false, "", "", nil
}
locusBase1 := personGenomeLocusInfoObject.LocusBase1
locusBase2 := personGenomeLocusInfoObject.LocusBase2
return true, locusBase1, locusBase2, nil
}
person1LocusBasePairKnown, person1LocusBase1, person1LocusBase2, err := getPersonGenomeLocusValues(person1GenomeIdentifier, person1DiseaseAnalysisObject)
if (err != nil) { return err }
if (person1LocusBasePairKnown == false){
// Offspring's disease info for this locus on this genome pair is unknown
continue
}
person2LocusBasePairKnown, person2LocusBase1, person2LocusBase2, err := getPersonGenomeLocusValues(person2GenomeIdentifier, person2DiseaseAnalysisObject)
if (err != nil) { return err }
if (person2LocusBasePairKnown == false){
// Offspring's disease info for this locus on this genome pair is unknown
continue
}
offspringAverageRiskWeight, offspringOddsRatioIsKnown, offspringAverageOddsRatio, averageUnknownOddsRatiosWeightSum, err := GetOffspringPolygenicDiseaseLocusInfo(locusRiskWeightsMap, locusOddsRatiosMap, person1LocusBase1, person1LocusBase2, person2LocusBase1, person2LocusBase2)
if (err != nil) { return err }
newOffspringPolygenicDiseaseLocusInfo := geneticAnalysis.OffspringPolygenicDiseaseLocusInfo{
OffspringRiskWeight: offspringAverageRiskWeight,
OffspringUnknownOddsRatiosWeightSum: averageUnknownOddsRatiosWeightSum,
}
if (offspringOddsRatioIsKnown == true){
newOffspringPolygenicDiseaseLocusInfo.OffspringOddsRatioIsKnown = true
newOffspringPolygenicDiseaseLocusInfo.OffspringOddsRatio = offspringAverageOddsRatio
}
offspringDiseaseLociInfoMap[locusIdentifier] = newOffspringPolygenicDiseaseLocusInfo
// Now we add risk weight info for this locus
locusMinimumRiskWeight := locusObject.MinimumRiskWeight
locusMaximumRiskWeight := locusObject.MaximumRiskWeight
minimumPossibleRiskWeightSum += locusMinimumRiskWeight
maximumPossibleRiskWeightSum += locusMaximumRiskWeight
summedRiskWeights += offspringAverageRiskWeight
}
numberOfLociTested := len(offspringDiseaseLociInfoMap)
if (numberOfLociTested == 0){
// We have no information about this genome pair's disease risk
// We don't add this genome pair's disease info to the diseaseInfoMap
return nil
}
genomePairDiseaseRiskScore, err := helpers.ScaleNumberProportionally(true, summedRiskWeights, minimumPossibleRiskWeightSum, maximumPossibleRiskWeightSum, 0, 10)
if (err != nil) { return err }
newOffspringGenomePairPolygenicDiseaseInfo := geneticAnalysis.OffspringGenomePairPolygenicDiseaseInfo{
NumberOfLociTested: numberOfLociTested,
OffspringRiskScore: genomePairDiseaseRiskScore,
LociInfoMap: offspringDiseaseLociInfoMap,
}
genomePairIdentifier := helpers.JoinTwo16ByteArrays(person1GenomeIdentifier, person2GenomeIdentifier)
offspringPolygenicDiseaseInfoMap[genomePairIdentifier] = newOffspringGenomePairPolygenicDiseaseInfo
return nil
}
err = addGenomePairDiseaseInfoToDiseaseMap(pair1Person1GenomeIdentifier, pair1Person2GenomeIdentifier)
if (err != nil) { return false, "", err }
if (genomePair2Exists == true){
err := addGenomePairDiseaseInfoToDiseaseMap(pair2Person1GenomeIdentifier, pair2Person2GenomeIdentifier)
if (err != nil) { return false, "", err }
}
newOffspringPolygenicDiseaseInfoObject := geneticAnalysis.OffspringPolygenicDiseaseInfo{
PolygenicDiseaseInfoMap: offspringPolygenicDiseaseInfoMap,
}
if (len(offspringPolygenicDiseaseInfoMap) >= 2){
// We check for conflicts
// Conflicts are only possible if two genome pairs with disease info for this disease exist
checkIfConflictExists := func()(bool, error){
numberOfLociTested := 0
offspringRiskScore := 0
offspringLociInfoMap := make(map[[3]byte]geneticAnalysis.OffspringPolygenicDiseaseLocusInfo)
firstItemReached := false
for _, genomePairDiseaseInfoObject := range offspringPolygenicDiseaseInfoMap{
genomePairNumberOfLociTested := genomePairDiseaseInfoObject.NumberOfLociTested
genomePairOffspringRiskScore := genomePairDiseaseInfoObject.OffspringRiskScore
genomePairLociInfoMap := genomePairDiseaseInfoObject.LociInfoMap
if (firstItemReached == false){
numberOfLociTested = genomePairNumberOfLociTested
offspringRiskScore = genomePairOffspringRiskScore
offspringLociInfoMap = genomePairLociInfoMap
firstItemReached = true
continue
}
if (numberOfLociTested != genomePairNumberOfLociTested){
return true, nil
}
if (offspringRiskScore != genomePairOffspringRiskScore){
return true, nil
}
areEqual := maps.Equal(offspringLociInfoMap, genomePairLociInfoMap)
if (areEqual == false){
// A conflict exists
return true, nil
}
}
return false, nil
}
conflictExists, err := checkIfConflictExists()
if (err != nil) { return false, "", err }
newOffspringPolygenicDiseaseInfoObject.ConflictExists = conflictExists
}
offspringPolygenicDiseasesMap[diseaseName] = newOffspringPolygenicDiseaseInfoObject
}
newCoupleAnalysis.PolygenicDiseasesMap = offspringPolygenicDiseasesMap
// Step 3: Traits
traitObjectsList, err := traits.GetTraitObjectsList()
if (err != nil) { return false, "", err }
// Map Structure: Trait Name -> Trait Info Object
offspringTraitsMap := make(map[string]geneticAnalysis.OffspringTraitInfo)
for _, traitObject := range traitObjectsList{
traitName := traitObject.TraitName
person1TraitAnalysisObject, err := getPersonTraitAnalysis(person1GenomesWithMetadataList, traitObject)
if (err != nil) { return false, "", err }
person2TraitAnalysisObject, err := getPersonTraitAnalysis(person2GenomesWithMetadataList, traitObject)
if (err != nil) { return false, "", err }
// This map stores the trait info for each genome pair
// Map Structure: Genome Pair Identifier -> OffspringGenomePairTraitInfo
offspringTraitInfoMap := make(map[[32]byte]geneticAnalysis.OffspringGenomePairTraitInfo)
// This will add the offspring trait information for the provided genome pair to the offspringTraitInfoMap
addGenomePairTraitInfoToOffspringMap := func(person1GenomeIdentifier [16]byte, person2GenomeIdentifier [16]byte)error{
person1TraitInfoMap := person1TraitAnalysisObject.TraitInfoMap
person2TraitInfoMap := person2TraitAnalysisObject.TraitInfoMap
person1GenomeTraitInfoObject, exists := person1TraitInfoMap[person1GenomeIdentifier]
if (exists == false){
// This person has no genome values for any loci for this trait
// No predictions are possible
return nil
}
person2GenomeTraitInfoObject, exists := person2TraitInfoMap[person2GenomeIdentifier]
if (exists == false){
// This person has no genome values for any loci for this trait
// No predictions are possible
return nil
}
person1LocusValuesMap := person1GenomeTraitInfoObject.LocusValuesMap
person2LocusValuesMap := person2GenomeTraitInfoObject.LocusValuesMap
anyRulesTested, numberOfRulesTested, offspringProbabilityOfPassingRulesMap, offspringAverageOutcomeScoresMap, err := GetOffspringTraitInfo(traitObject, person1LocusValuesMap, person2LocusValuesMap)
if (err != nil) { return err }
if (anyRulesTested == false){
// No rules were tested for this trait
// We will not add anything to the trait info map for this genome pair
return nil
}
newOffspringGenomePairTraitInfoObject := geneticAnalysis.OffspringGenomePairTraitInfo{
NumberOfRulesTested: numberOfRulesTested,
OffspringAverageOutcomeScoresMap: offspringAverageOutcomeScoresMap,
ProbabilityOfPassingRulesMap: offspringProbabilityOfPassingRulesMap,
}
genomePairIdentifier := helpers.JoinTwo16ByteArrays(person1GenomeIdentifier, person2GenomeIdentifier)
offspringTraitInfoMap[genomePairIdentifier] = newOffspringGenomePairTraitInfoObject
return nil
}
err = addGenomePairTraitInfoToOffspringMap(pair1Person1GenomeIdentifier, pair1Person2GenomeIdentifier)
if (err != nil) { return false, "", err }
if (genomePair2Exists == true){
err := addGenomePairTraitInfoToOffspringMap(pair2Person1GenomeIdentifier, pair2Person2GenomeIdentifier)
if (err != nil) { return false, "", err }
}
newOffspringTraitInfoObject := geneticAnalysis.OffspringTraitInfo{
TraitInfoMap: offspringTraitInfoMap,
}
if (len(offspringTraitInfoMap) >= 2){
// We check for conflicts
// Conflicts are only possible if two genome pairs exist with information about the trait
checkIfConflictExists := func()(bool, error){
// We check for conflicts between each genome pair's outcome scores and trait rules maps
offspringAverageOutcomeScoresMap := make(map[string]float64)
offspringProbabilityOfPassingRulesMap := make(map[[3]byte]int)
firstItemReached := false
for _, genomePairTraitInfoObject := range offspringTraitInfoMap{
currentOffspringAverageOutcomeScoresMap := genomePairTraitInfoObject.OffspringAverageOutcomeScoresMap
currentProbabilityOfPassingRulesMap := genomePairTraitInfoObject.ProbabilityOfPassingRulesMap
if (firstItemReached == false){
offspringAverageOutcomeScoresMap = currentOffspringAverageOutcomeScoresMap
offspringProbabilityOfPassingRulesMap = currentProbabilityOfPassingRulesMap
firstItemReached = true
continue
}
areEqual := maps.Equal(offspringAverageOutcomeScoresMap, currentOffspringAverageOutcomeScoresMap)
if (areEqual == false){
return true, nil
}
areEqual = maps.Equal(offspringProbabilityOfPassingRulesMap, currentProbabilityOfPassingRulesMap)
if (areEqual == false){
return true, nil
}
}
return false, nil
}
conflictExists, err := checkIfConflictExists()
if (err != nil) { return false, "", err }
newOffspringTraitInfoObject.ConflictExists = conflictExists
}
offspringTraitsMap[traitName] = newOffspringTraitInfoObject
}
newCoupleAnalysis.TraitsMap = offspringTraitsMap
analysisBytes, err := encoding.EncodeMessagePackBytes(newCoupleAnalysis)
if (err != nil) { return false, "", err }
analysisString := string(analysisBytes)
return true, analysisString, nil
}
// This function will return a list of 100 prospective offspring genomes
// Each genome represents an equal-probability offspring genome from both people's genomes
// This function takes into account the effects of genetic linkage
// Any locations which do not exist in both people's genomes will not be included
//Outputs:
// -bool: Any locus value exists between both users
// -[]map[int64]locusValue.LocusValue
// -error
func getProspectiveOffspringGenomesList(lociList []int64, person1LociMap map[int64]locusValue.LocusValue, person2LociMap map[int64]locusValue.LocusValue)(bool, []map[int64]locusValue.LocusValue, error){
// -We use randomness to generate the offspring genomes
// -We want the results to be the same for each pair of people each time, so we have to seed our randomness generator
// -This is necessary so that two people's analysis results do not change every time
// -Instead, the same 2 people will produce the exact same result every time
pseudorandomNumberGenerator := mathRand.New(mathRand.NewPCG(1, 2))
//Outputs:
// -[]int64: A list of random breakpoints for this chromosome that are statistically accurate
// -error
getRandomChromosomeBreakpoints := func(chromosome int)([]int64, error){
getChromosomeLength := func()(int64, error){
// Approximate number of base pairs in each chromosome taken from: https://www.ncbi.nlm.nih.gov/books/NBK557784/
switch chromosome{
case 1:{
return 249000000, nil
}
case 2:{
return 243000000, nil
}
case 3:{
return 200000000, nil
}
case 4:{
return 192000000, nil
}
case 5:{
return 181000000, nil
}
case 6:{
return 170000000, nil
}
case 7:{
return 158000000, nil
}
case 8:{
return 146000000, nil
}
case 9:{
return 140000000, nil
}
case 10:{
return 135000000, nil
}
case 11:{
return 135000000, nil
}
case 12:{
return 132000000, nil
}
case 13:{
return 114000000, nil
}
case 14:{
return 106000000, nil
}
case 15:{
return 100000000, nil
}
case 16:{
return 89000000, nil
}
case 17:{
return 79000000, nil
}
case 18:{
return 76000000, nil
}
case 19:{
return 64000000, nil
}
case 20:{
return 62000000, nil
}
case 21:{
return 47000000, nil
}
case 22:{
return 50000000, nil
}
}
chromosomeString := helpers.ConvertIntToString(chromosome)
return 0, errors.New("getRandomChromosomeBreakpoints called with invalid chromosome: " + chromosomeString)
}
chromosomeLength, err := getChromosomeLength()
if (err != nil) { return nil, err }
listOfRandomBreakpoints := make([]int64, 0)
// TODO: Take into account different recombination rate for each chromosome
// TODO: There are also breakpoint hotspots which we need to account for
// TODO: I read somewhere that recombination break points are less likely to occur within genes,
// meaning they are more likely to occur at the gene boundaries (codons)
// We step by 1,000,000 each time
// It would be more realistic if we did it in 1 integer increments, but it would be slower
for position := int64(0); position <= chromosomeLength; position += 1000000{
//From Wikipedia:
// A centimorgan (abbreviated cM) is a unit for measuring genetic linkage.
// It is defined as the distance between chromosome positions (loci) for which the expected
// average number of intervening chromosomal crossovers in a single generation is 0.01.
// One centimorgan corresponds to about 1 million base pairs in humans on average
//
// A chromosomal crossover == recombination breakpoint
//
// For every 1,000,000 base pairs, there is a 0.01 probability that there is a breakpoint
randomFloat := pseudorandomNumberGenerator.Float64()
if (randomFloat <= 0.01){
// This has a 0.01, or 1% probability of being true
listOfRandomBreakpoints = append(listOfRandomBreakpoints, position)
}
}
return listOfRandomBreakpoints, nil
}
// Map Structure: rsID -> Locus Value
offspringGenomesList := make([]map[int64]locusValue.LocusValue, 0)
for i:=0; i < 100; i++{
// This map stores the chromosome breakpoints for person1
// Map Structure: Chromosome -> List of breakpoints
person1ChromosomeBreakpointsMap := make(map[int][]int64)
// This map stores the chromosome breakpoints for person2
// Map Structure: Chromosome -> List of breakpoints
person2ChromosomeBreakpointsMap := make(map[int][]int64)
// This stores the locus values for this prospective offspring
// Map Structure: rsID -> Locus Value
prospectiveOffspringGenome := make(map[int64]locusValue.LocusValue)
for _, rsID := range lociList{
//Outputs:
// -bool: Allele is known
// -string: Locus base
// -error
getPersonAllele := func(personLociMap map[int64]locusValue.LocusValue, personBreakpointsMap map[int][]int64)(bool, string, error){
personLocusValue, exists := personLociMap[rsID]
if (exists == false){
return false, "", nil
}
personLocusBase1 := personLocusValue.Base1Value
personLocusBase2 := personLocusValue.Base1Value
personLocusIsPhased := personLocusValue.LocusIsPhased
if (personLocusIsPhased == false){
// Breakpoints are unnecessary
// We either choose base 1 or 2
randomBool := helpers.GetRandomBool()
if (randomBool == true){
return true, personLocusBase1, nil
}
return true, personLocusBase2, nil
}
// We have a phased locus
// We figure out which allele to use by seeing which allele gets inherited from our random breakpoints list
// We figure out the chromosome and position of this locus
locusMetadataExists, locusMetadataObject, err := locusMetadata.GetLocusMetadata(rsID)
if (err != nil) { return false, "", err }
if (locusMetadataExists == false){
rsIDString := helpers.ConvertInt64ToString(rsID)
return false, "", errors.New("getProspectiveOffspringGenomesList called with unknown rsID: " + rsIDString)
}
locusPosition := locusMetadataObject.Position
locusChromosome := locusMetadataObject.Chromosome
getPersonChromosomeBreakpointsList := func()([]int64, error){
breakpointsList, exists := personBreakpointsMap[locusChromosome]
if (exists == true){
return breakpointsList, nil
}
// We have to create a new breakpoints list
newBreakpointsList, err := getRandomChromosomeBreakpoints(locusChromosome)
if (err != nil) { return nil, err }
personBreakpointsMap[locusChromosome] = newBreakpointsList
return newBreakpointsList, nil
}
personBreakpointsList, err := getPersonChromosomeBreakpointsList()
if (err != nil) { return false, "", err }
getLocusListIndex := func()int{
for index, breakpoint := range personBreakpointsList{
if (int64(locusPosition) <= breakpoint){
return index
}
}
index := len(personBreakpointsList)
// This is reached if the final breakpoint in the list is less than the locus's position, or if there were no breakpoints
return index
}
locusListIndex := getLocusListIndex()
if (locusListIndex%2 == 0){
return true, personLocusBase1, nil
}
return true, personLocusBase2, nil
}
person1AlleleIsKnown, person1Allele, err := getPersonAllele(person1LociMap, person1ChromosomeBreakpointsMap)
if (err != nil) { return false, nil, err }
if (person1AlleleIsKnown == false){
continue
}
person2AlleleIsKnown, person2Allele, err := getPersonAllele(person2LociMap, person2ChromosomeBreakpointsMap)
if (err != nil) { return false, nil, err }
if (person2AlleleIsKnown == false){
continue
}
offspringLocusValue := locusValue.LocusValue{
Base1Value: person1Allele,
Base2Value: person2Allele,
LocusIsPhased: true,
}
prospectiveOffspringGenome[rsID] = offspringLocusValue
}
if (len(prospectiveOffspringGenome) == 0){
// We don't have any locations at which both people's genomes contain a locus value.
return false, nil, nil
}
offspringGenomesList = append(offspringGenomesList, prospectiveOffspringGenome)
}
return true, offspringGenomesList, nil
}
// We also use this function when calculating offspring probabilities between users in viewProfileGui.go
//Outputs:
// -bool: Probability offspring has disease is known
// -int: Percentage probability offspring has disease (0-100)
// -bool: Probability offspring has variant is known
// -int: Percentage probability offspring has variant (0-100)
// -error
func GetOffspringMonogenicDiseaseProbabilities(dominantOrRecessive string, person1ProbabilityIsKnown bool, person1WillPassVariantPercentageProbability int, person2ProbabilityIsKnown bool, person2WillPassVariantPercentageProbability int)(bool, int, bool, int, error){
if (dominantOrRecessive != "Dominant" && dominantOrRecessive != "Recessive"){
return false, 0, false, 0, errors.New("GetOffspringMonogenicDiseaseProbabilities called with invalid dominantOrRecessive: " + dominantOrRecessive)
}
if (person1ProbabilityIsKnown == false && person2ProbabilityIsKnown == false){
return false, 0, false, 0, nil
}
if (person1ProbabilityIsKnown == true){
if (person1WillPassVariantPercentageProbability < 0 || person1WillPassVariantPercentageProbability > 100){
return false, 0, false, 0, errors.New("GetOffspringMonogenicDiseaseProbabilities called with invalid person1WillPassVariantProbability")
}
}
if (person2ProbabilityIsKnown == true){
if (person2WillPassVariantPercentageProbability < 0 || person2WillPassVariantPercentageProbability > 100){
return false, 0, false, 0, errors.New("GetOffspringMonogenicDiseaseProbabilities called with invalid person2WillPassVariantProbability")
}
}
if (person1ProbabilityIsKnown == false || person2ProbabilityIsKnown == false){
// Only 1 of the person's probabilities are known
getPersonWhoHasVariantProbabilityOfPassingIt := func()int{
if (person1ProbabilityIsKnown == true){
return person1WillPassVariantPercentageProbability
}
return person2WillPassVariantPercentageProbability
}
personWhoHasVariantProbabilityOfPassingIt := getPersonWhoHasVariantProbabilityOfPassingIt()
if (personWhoHasVariantProbabilityOfPassingIt == 100){
if (dominantOrRecessive == "Dominant"){
// We know the offspring will have the disease and will have a variant
return true, 100, true, 100, nil
}
//dominantOrRecessive == "Recessive"
// We don't know if the offspring will have the disease, but we know they will have a variant
return false, 0, true, 100, nil
}
if (personWhoHasVariantProbabilityOfPassingIt == 0){
if (dominantOrRecessive == "Recessive"){
// We know the offspring will not have the disease, but we don't know if they will have a variant
return true, 0, false, 0, nil
}
}
// We don't know the probabilities that the offspring will have the disease or if they will have a variant
return false, 0, false, 0, nil
}
person1WillPassVariantProbability := float64(person1WillPassVariantPercentageProbability)/100
person2WillPassVariantProbability := float64(person2WillPassVariantPercentageProbability)/100
// The probability offspring has a variant = the probability that either parent passes a variant (inclusive or)
// We find the probability of the offspring having a monogenic disease variant as follows:
// P(A U B) = P(A) + P(B) - P(A ∩ B)
// (Probability of person 1 passing a variant) + (Probability of person 2 passing a variant) - (Probability of offspring having disease)
// A person with a variant may have the disease, or just be a carrier.
probabilityOffspringHasVariant := person1WillPassVariantProbability + person2WillPassVariantProbability - (person1WillPassVariantProbability * person2WillPassVariantProbability)
if (dominantOrRecessive == "Dominant"){
// The probability of having the monogenic disease is the same as the probability of having a variant
percentageProbabilityOffspringHasVariant := int(probabilityOffspringHasVariant * 100)
return true, percentageProbabilityOffspringHasVariant, true, percentageProbabilityOffspringHasVariant, nil
}
// We find the probability of the offspring having the mongenic disease as follows:
// P(A and B) = P(A) * P(B)
// (Probability of person 1 Passing a variant) * (Probability of person 2 passing a variant)
probabilityOffspringHasDisease := person1WillPassVariantProbability * person2WillPassVariantProbability
percentageProbabilityOffspringHasDisease := probabilityOffspringHasDisease * 100
percentageProbabilityOffspringHasVariant := probabilityOffspringHasVariant * 100
// This conversion remove any digits after the radix point
// This will not result in any false 0% values, an example being 0.9% becoming 0%
// This is because the lowest non-zero probability a person can have for passing a variant is 50%
// Thus, the lowest non-zero probability of an offspring having a disease is 25%
percentageProbabilityOffspringHasDiseaseInt := int(percentageProbabilityOffspringHasDisease)
percentageProbabilityOffspringHasVariantInt := int(percentageProbabilityOffspringHasVariant)
return true, percentageProbabilityOffspringHasDiseaseInt, true, percentageProbabilityOffspringHasVariantInt, nil
}
//Outputs:
// -int: Offspring average risk weight
// -bool: Odds ratio is known
// -float64: Offspring average odds ratio
// -int: Offspring unknown odds ratios weight sum
// -error
func GetOffspringPolygenicDiseaseLocusInfo(locusRiskWeightsMap map[string]int, locusOddsRatiosMap map[string]float64, person1LocusBase1 string, person1LocusBase2 string, person2LocusBase1 string, person2LocusBase2 string)(int, bool, float64, int, error){
// We create the 4 options for the offspring's bases at this locus
offspringBasePairOutcome1 := person1LocusBase1 + ";" + person2LocusBase1
offspringBasePairOutcome2 := person1LocusBase2 + ";" + person2LocusBase2
offspringBasePairOutcome3 := person1LocusBase1 + ";" + person2LocusBase2
offspringBasePairOutcome4 := person1LocusBase2 + ";" + person2LocusBase1
baseOutcomesList := []string{offspringBasePairOutcome1, offspringBasePairOutcome2, offspringBasePairOutcome3, offspringBasePairOutcome4}
summedRiskWeight := 0
numberOfSummedOddsRatios := 0
summedOddsRatios := float64(0)
numberOfSummedUnknownOddsRatioWeights := 0
summedUnknownOddsRatioWeights := 0
for _, outcomeBasePair := range baseOutcomesList{
isValid := verifyBasePair(outcomeBasePair)
if (isValid == false){
return 0, false, 0, 0, errors.New("GetOffspringPolygenicDiseaseLocusInfo called with invalid locus base pair: " + outcomeBasePair)
}
offspringOutcomeRiskWeight, exists := locusRiskWeightsMap[outcomeBasePair]
if (exists == false){
// We do not know the risk weight for this base pair
// We treat this as a 0 risk for both weight and odds ratio
// No effect on risk is represented as an odds ratio of 1
summedOddsRatios += 1
numberOfSummedOddsRatios += 1
continue
}
summedRiskWeight += offspringOutcomeRiskWeight
offspringOutcomeOddsRatio, exists := locusOddsRatiosMap[outcomeBasePair]
if (exists == false){
// This particular outcome has no known odds ratio
// We add it to the unknown odds ratio weights sum
summedUnknownOddsRatioWeights += offspringOutcomeRiskWeight
numberOfSummedUnknownOddsRatioWeights += 1
} else {
summedOddsRatios += offspringOutcomeOddsRatio
numberOfSummedOddsRatios += 1
}
}
averageRiskWeight := summedRiskWeight/4
getAverageUnknownOddsRatiosWeightSum := func()int{
if (numberOfSummedUnknownOddsRatioWeights == 0){
return 0
}
averageUnknownOddsRatiosWeightSum := summedUnknownOddsRatioWeights/numberOfSummedUnknownOddsRatioWeights
return averageUnknownOddsRatiosWeightSum
}
averageUnknownOddsRatiosWeightSum := getAverageUnknownOddsRatiosWeightSum()
if (numberOfSummedOddsRatios == 0){
return averageRiskWeight, false, 0, averageUnknownOddsRatiosWeightSum, nil
}
averageOddsRatio := summedOddsRatios/float64(numberOfSummedOddsRatios)
return averageRiskWeight, true, averageOddsRatio, averageUnknownOddsRatiosWeightSum, nil
}
//Outputs:
// -bool: Any rules tested (if false, no offspring trait information is known)
// -int: Number of rules tested
// -map[[3]byte]int: Offspring probability of passing rules map
// Map Structure: Rule identifier -> Offspring probability of passing rule (1-100)
// -map[string]float64: Offspring average outcome scores map
// Map Structure: Outcome Name -> Offspring average outcome score
// -error
func GetOffspringTraitInfo(traitObject traits.Trait, person1LocusValuesMap map[int64]locusValue.LocusValue, person2LocusValuesMap map[int64]locusValue.LocusValue)(bool, int, map[[3]byte]int, map[string]float64, error){
// First, we create 100 prospective offspring genomes.
traitLociList := traitObject.LociList
anyLocusValueExists, prospectiveOffspringGenomesList, err := getProspectiveOffspringGenomesList(traitLociList, person1LocusValuesMap, person2LocusValuesMap)
if (err != nil) { return false, 0, nil, nil, err }
if (anyLocusValueExists == false){
return false, 0, nil, nil, nil
}
traitRulesList := traitObject.RulesList
// Map Structure: Rule Identifier -> Number of offspring who pass the rule (out of 100 prospective offspring)
offspringPassesRulesCountMap := make(map[[3]byte]int)
// We use this map to keep track of the rules for which we know every offspring's passes-rule status
// Map Structure: Rule Identifier -> Rule Object
offspringRulesWithKnownStatusMap := make(map[[3]byte]traits.TraitRule)
for offspringIndex, offspringGenomeMap := range prospectiveOffspringGenomesList{
// We iterate through rules to determine genome pair trait info
for _, ruleObject := range traitRulesList{
ruleIdentifierHex := ruleObject.RuleIdentifier
ruleIdentifier, err := encoding.DecodeHexStringTo3ByteArray(ruleIdentifierHex)
if (err != nil) { return false, 0, nil, nil, err }
if (offspringIndex != 0){
_, exists := offspringRulesWithKnownStatusMap[ruleIdentifier]
if (exists == false){
// We already tried to check a previous offspring's passes-rule status for this rule
// We know that the offspring's passes-rule status will be unknown for every prospective offspring
continue
}
}
// This is a list that describes the locus rsids and their values that must be fulfilled to pass the rule
ruleLocusObjectsList := ruleObject.LociList
//Outputs:
// -bool: Offspring passes rule is known
// -bool: Offspring passes rule
getOffspringPassesRuleStatus := func()(bool, bool){
// If any rule locus status is unknown, then we consider the offspring-passes-rule status to be unknown,
// unless we know that there is a rule that the offspring does not pass
anyRuleIsUnknown := false
for _, ruleLocusObject := range ruleLocusObjectsList{
locusRSID := ruleLocusObject.LocusRSID
locusRequiredBasePairsList := ruleLocusObject.BasePairsList
offspringLocusValue, exists := offspringGenomeMap[locusRSID]
if (exists == false){
anyRuleIsUnknown = true
// We keep searching to see if there are any rules we know the offspring does not pass
continue
}
offspringBase1 := offspringLocusValue.Base1Value
offspringBase2 := offspringLocusValue.Base2Value
offspringBasePair := offspringBase1 + ";" + offspringBase2
offspringPassesRuleLocus := slices.Contains(locusRequiredBasePairsList, offspringBasePair)
if (offspringPassesRuleLocus == false){
// The offspring does not pass this rule locus
// Thus, the offspring does not pass the rule
return true, false
}
}
if (anyRuleIsUnknown == true){
// We don't know if the offspring passes the rule
return false, false
}
// The offspring passes the rule
return true, true
}
offspringPassesRuleIsKnown, offspringPassesRule := getOffspringPassesRuleStatus()
if (offspringPassesRuleIsKnown == false){
continue
}
offspringRulesWithKnownStatusMap[ruleIdentifier] = ruleObject
if (offspringPassesRule == true){
offspringPassesRulesCountMap[ruleIdentifier] += 1
}
}
}
// Map Structure: Rule Identifier -> Offspring Probability Of Passing Rule
// The map value stores the probability that the offspring will pass the rule
// This is a number between 0-100%
offspringProbabilityOfPassingRulesMap := make(map[[3]byte]int)
// Map Structure: Outcome Name -> Outcome Score
// Example: "Intolerant" -> 2.5
offspringAverageOutcomeScoresMap := make(map[string]float64)
for ruleIdentifier, ruleObject := range offspringRulesWithKnownStatusMap{
//Output:
// -int: Offspring probability of passing rule (0-100%)
getOffspringPercentageProbabilityOfPassingRule := func()int{
numberOfOffspringWhoPassRule, exists := offspringPassesRulesCountMap[ruleIdentifier]
if (exists == false){
// None of the offspring passed the rule
return 0
}
// There are 100 tested offspring
// Thus, the percentage of offspring who passed the rule is the same as the number of offspring who passed the rule
// The probability of the offspring passing the rule is the same as the percentage of offspring who passed the rule
return numberOfOffspringWhoPassRule
}
offspringPercentageProbabilityOfPassingRule := getOffspringPercentageProbabilityOfPassingRule()
offspringProbabilityOfPassingRulesMap[ruleIdentifier] = offspringPercentageProbabilityOfPassingRule
// This is the 0 - 1 probability value
offspringProbabilityOfPassingRule := float64(offspringPercentageProbabilityOfPassingRule)/100
ruleOutcomePointsMap := ruleObject.OutcomePointsMap
for outcomeName, outcomePointsEffect := range ruleOutcomePointsMap{
pointsToAdd := float64(outcomePointsEffect) * offspringProbabilityOfPassingRule
offspringAverageOutcomeScoresMap[outcomeName] += pointsToAdd
}
}
numberOfRulesTested := len(offspringProbabilityOfPassingRulesMap)
if (numberOfRulesTested == 0){
return false, 0, nil, nil, nil
}
traitOutcomesList := traitObject.OutcomesList
// We add all outcomes for which there were no points
for _, traitOutcome := range traitOutcomesList{
_, exists := offspringAverageOutcomeScoresMap[traitOutcome]
if (exists == false){
offspringAverageOutcomeScoresMap[traitOutcome] = 0
}
}
return true, numberOfRulesTested, offspringProbabilityOfPassingRulesMap, offspringAverageOutcomeScoresMap, nil
}
// This function will retrieve the base pair of the locus from the input genome map
// We need this because each rsID has aliases, so we must sometimes check those aliases to find locus values
//
// Outputs:
// -bool: Valid base pair value found
// -string: Base 1 Value (Nucleotide base for the SNP)
// -string: Base 2 Value (Nucleotide base for the SNP)
// -bool: Locus base pair is phased
// -error
func getGenomeLocusBasePair(inputGenomeMap map[int64]locusValue.LocusValue, locusRSID int64)(bool, string, string, bool, error){
// Outputs:
// -bool: Locus value found
// -locusValue.LocusValue
// -error
getLocusValue := func()(bool, locusValue.LocusValue, error){
currentLocusValue, exists := inputGenomeMap[locusRSID]
if (exists == true){
return true, currentLocusValue, nil
}
// We check for aliases
anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(locusRSID)
if (err != nil) { return false, locusValue.LocusValue{}, err }
if (anyAliasesExist == false){
return false, locusValue.LocusValue{}, nil
}
for _, rsidAlias := range rsidAliasesList{
currentLocusValue, exists := inputGenomeMap[rsidAlias]
if (exists == true){
return true, currentLocusValue, nil
}
}
return false, locusValue.LocusValue{}, nil
}
locusValueFound, locusValueObject, err := getLocusValue()
if (err != nil) { return false, "", "", false, err }
if (locusValueFound == false){
return false, "", "", false, nil
}
base1Value := locusValueObject.Base1Value
base2Value := locusValueObject.Base2Value
locusIsPhased := locusValueObject.LocusIsPhased
return true, base1Value, base2Value, locusIsPhased, nil
}
//Outputs:
// -geneticAnalysis.PersonMonogenicDiseaseInfo: Monogenic disease analysis object
// -error
func getPersonMonogenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, diseaseObject monogenicDiseases.MonogenicDisease)(geneticAnalysis.PersonMonogenicDiseaseInfo, error){
emptyDiseaseInfoObject := geneticAnalysis.PersonMonogenicDiseaseInfo{}
dominantOrRecessive := diseaseObject.DominantOrRecessive
variantsList := diseaseObject.VariantsList
// We use this map to keep track of which RSIDs corresponds to each variant
// We also use it to have a map of all variants for the disease
// Map Structure: Variant Identifier -> []rsID
variantRSIDsMap := make(map[[3]byte][]int64)
// This map stores all rsIDs for this monogenic disease
// These are locations in the disease's gene which, if mutated, are known to cause the disease
// We use this map to avoid duplicate rsIDs, because one rsID can have multiple variants which belong to it
// We also store all alias rsIDs in this map
allRSIDsMap := make(map[int64]struct{})
for _, variantObject := range variantsList{
variantIdentifierHex := variantObject.VariantIdentifier
variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex)
if (err != nil) { return emptyDiseaseInfoObject, err }
variantRSID := variantObject.VariantRSID
variantRSIDsList := []int64{variantRSID}
// We add aliases to variantRSIDsList
anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(variantRSID)
if (err != nil) { return emptyDiseaseInfoObject, err }
if (anyAliasesExist == true){
variantRSIDsList = append(variantRSIDsList, rsidAliasesList...)
}
variantRSIDsMap[variantIdentifier] = variantRSIDsList
for _, rsID := range variantRSIDsList{
allRSIDsMap[rsID] = struct{}{}
}
}
// Now we create a new map without any rsID aliases
// Each rsID in this map represents a unique locus on the genome
// Each rsID may have aliases, but they are not included in this map
allUniqueRSIDsMap := make(map[int64]struct{})
for rsID, _ := range allRSIDsMap{
anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(rsID)
if (err != nil) { return emptyDiseaseInfoObject, err }
if (anyAliasesExist == false){
allUniqueRSIDsMap[rsID] = struct{}{}
continue
}
// We see if we already added an alias of this rsID to the map
checkIfAliasAlreadyExists := func()bool{
for _, rsIDAlias := range rsidAliasesList{
_, exists := allUniqueRSIDsMap[rsIDAlias]
if (exists == true){
return true
}
}
return false
}
aliasAlreadyExists := checkIfAliasAlreadyExists()
if (aliasAlreadyExists == true){
// We already added this alias
continue
}
allUniqueRSIDsMap[rsID] = struct{}{}
}
// Map Structure: Genome Identifier -> PersonGenomeMonogenicDiseaseInfo
monogenicDiseaseInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomeMonogenicDiseaseInfo)
for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{
genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier
genomeMap := genomeWithMetadataObject.GenomeMap
// This stores all variant info for this genome
// Map Structure: Variant Identifier -> PersonGenomeMonogenicDiseaseVariantInfo
variantsInfoMap := make(map[[3]byte]geneticAnalysis.PersonGenomeMonogenicDiseaseVariantInfo)
for _, variantObject := range variantsList{
variantIdentifierHex := variantObject.VariantIdentifier
variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex)
if (err != nil) { return emptyDiseaseInfoObject, err }
variantRSID := variantObject.VariantRSID
basePairValueFound, base1Value, base2Value, locusIsPhased, err := getGenomeLocusBasePair(genomeMap, variantRSID)
if (err != nil) { return emptyDiseaseInfoObject, err }
if (basePairValueFound == false){
// This genome does not contain info for this variant
// We skip it
continue
}
// This genome has at least 1 variant
variantDefectiveBase := variantObject.DefectiveBase
getBaseIsVariantMutationBool := func(inputBase string)bool{
if (inputBase == variantDefectiveBase){
return true
}
// Base could be mutated to a different unhealthy base
// That mutation could be a neutral/healthier change
// We only care about this specific variant
return false
}
base1IsDefective := getBaseIsVariantMutationBool(base1Value)
base2IsDefective := getBaseIsVariantMutationBool(base2Value)
newDiseaseVariantInfoObject := geneticAnalysis.PersonGenomeMonogenicDiseaseVariantInfo{
Base1HasVariant: base1IsDefective,
Base2HasVariant: base2IsDefective,
LocusIsPhased: locusIsPhased,
}
variantsInfoMap[variantIdentifier] = newDiseaseVariantInfoObject
//TODO: Add LocusIsPhased to readGeneticAnalysis package
}
// We are done adding variant information for the genome
// Now we determine probability that user will pass a disease variant to offspring, and if the user has the disease
numberOfVariantsTested := len(variantsInfoMap)
if (numberOfVariantsTested == 0){
// We don't know anything about this genome's disease risk for this disease
// We won't add any information to the map
continue
}
// This stores the number of loci that were tested
// Each locus can have multiple potential variants
numberOfLociTested := 0
// This stores the number of tested loci that are phased
// A higher number means that the results are more potentially more accurate
// It is only more accurate if multiple heterozygous variants on seperate loci exist.
numberOfPhasedLoci := 0
for rsID, _ := range allUniqueRSIDsMap{
locusValueExists, _, _, locusIsPhased, err := getGenomeLocusBasePair(genomeMap, rsID)
if (err != nil) { return emptyDiseaseInfoObject, err }
if (locusValueExists == false){
continue
}
numberOfLociTested += 1
if (locusIsPhased == true){
numberOfPhasedLoci += 1
}
}
// Outputs:
// -bool: Person has disease
// -float64: Probability Person will pass a defect (variant) to offspring (0-1)
// -error
getPersonDiseaseInfo := func()(bool, float64, error){
// These variables are used to count the number of defective variants that exist on each chromosome
numberOfVariants_Chromosome1 := 0
numberOfVariants_Chromosome2 := 0
numberOfVariants_UnknownChromosome := 0
// We use this map to keep track of how many mutations exist for each rsID
// This allows us to know if 2 different variant mutations exist for a single rsID
// For example, base1 is a different deleterious mutation than base2
// If this ever happens, we know that the user has the disease,
// because both copies of the gene locus are defective.
rsidMutationsMap := make(map[int64]int)
for variantIdentifier, variantInfoObject := range variantsInfoMap{
locusIsPhasedStatus := variantInfoObject.LocusIsPhased
base1HasVariant := variantInfoObject.Base1HasVariant
base2HasVariant := variantInfoObject.Base2HasVariant
if (base1HasVariant == false && base2HasVariant == false){
// Neither chromosome contains the variant mutation.
continue
}
if (base1HasVariant == true && base2HasVariant == true){
// Both chromosomes contain the same variant mutation.
// Person has the disease.
// Person will definitely pass disease variant to offspring.
return true, 1, nil
}
// We know that this variant exists on 1 of the bases, but not both.
variantRSIDsList, exists := variantRSIDsMap[variantIdentifier]
if (exists == false){
return false, 0, errors.New("variantRSIDsMap missing variantIdentifier.")
}
for _, rsID := range variantRSIDsList{
rsidMutationsMap[rsID] += 1
}
if (locusIsPhasedStatus == true){
if (base1HasVariant == true){
numberOfVariants_Chromosome1 += 1
}
if (base2HasVariant == true){
numberOfVariants_Chromosome2 += 1
}
} else {
if (base1HasVariant == true || base2HasVariant == true){
numberOfVariants_UnknownChromosome += 1
}
}
}
totalNumberOfVariants := numberOfVariants_Chromosome1 + numberOfVariants_Chromosome2 + numberOfVariants_UnknownChromosome
if (totalNumberOfVariants == 0){
// Person does not have any disease variants.
// They do not have the disease, and have no chance of passing a disease variant
return false, 0, nil
}
// Now we check to see if there are any loci which have 2 different variants, one for each base
for _, numberOfMutations := range rsidMutationsMap{
if (numberOfMutations >= 2){
// Person has 2 mutations on the same location
// They must have the disease, and will definitely pass a variant to their offspring
return true, 1, nil
}
}
// At this point, we know that there are no homozygous variant mutations
// All variant mutations are heterozygous, meaning the other chromosome strand's base is healthy
//Outputs:
// -bool: Person has disease
getPersonHasDiseaseBool := func()bool{
if (dominantOrRecessive == "Dominant"){
// Only 1 variant is needed for the person to have the disease
// We know they have at least 1 variant
return true
}
// dominantOrRecessive == "Recessive"
if (totalNumberOfVariants == 1){
// There is only 1 variant in total.
// This single variant cannot exist on both chromosomes.
// The person does not have the disease
return false
}
// We know that there are at least 2 variants
if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){
// We know there is at least 1 variant mutation on each chromosome
// Therefore, the person has the disease
return true
}
if (numberOfVariants_UnknownChromosome == 0){
// We know that variants do not exist on both chromosomes, only on 1.
// Thus, the person does not have the disease
return false
}
// We know there are at least 2 variants
// We know there is at least 1 variant whose phase is unknown
// If all mutations are on the same chromosome, the person does not have the disease.
// If at least 1 mutation exists on each chromosome, the person does have the disease.
// Either way, we don't know enough to say if the person has the disease.
// We will report that they do not, because their genome does not conclusively say that they do.
// This is why phased genomes are useful and provide a more accurate reading
// TODO: Explain this to the user in the GUI
// We must explain that unphased genomes will not detect disease sometimes
return false
}
personHasDiseaseBool := getPersonHasDiseaseBool()
// Output:
// -float64: Probability person will pass a disease variant to their offspring (0-1)
getPersonWillPassVariantProbability := func()float64{
if (totalNumberOfVariants == 1){
// There is only 1 variant on any chromosome
// The probability of the person passing a variant is 50%.
return 0.5
}
// We know that there are at least 2 variants
if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){
// We know there is at least 1 variant mutation on each chromosome
// Therefore, the person will definitely pass a variant
return 1
}
if (numberOfVariants_UnknownChromosome == 0){
// We know that variants do not exist on both chromosomes, only on 1.
// Thus, the person has a 50% probability of passing a variant
return 0.5
}
// We know all variants are heterozygous
// From Wikipeia:
// The human genome contains somewhere between 19,000 and 20,000 protein-coding genes.
// These genes contain an average of 10 introns and the average size of an intron is about 6 kb (6,000 base pairs)
// This means that the average size of a protein-coding gene is about 62 kb (62,000 base pairs)
// The probability of a recombination breakpoint occurring within the gene is very small
// If there is 1 breakpoint every 100 million locations, on average, and each gene is 62,000 base pairs long,
// then the probability of a breakpoint occurring within a gene is 62,000/100,000,000 = 0.00062 = .062%
// Thus, we disregard the risk of a breakpoint occurring within a gene
// I also read somewhere that breakpoints are less likely to occurr within genes, which makes this likelihood even smaller
// At this point, we know there at at least 2 variants
// We know that at least 1 of the variants has an unknown phase
// We don't know if all of the variants belong to the same chromosome
// If variants exist on both chromosomes, then the probability of passing a variant is 100%
// If all variants exist on the same chromosome, then the probability of passing a variant is 50%
// We know there is at least a 50% chance of passing a variant, and possibly higher
return 0.5
}
personWillPassVariantProbability := getPersonWillPassVariantProbability()
return personHasDiseaseBool, personWillPassVariantProbability, nil
}
personHasDisease, probabilityPersonWillPassAnyVariant, err := getPersonDiseaseInfo()
if (err != nil) { return emptyDiseaseInfoObject, err }
percentageProbabilityPersonWillPassADiseaseVariant := int(probabilityPersonWillPassAnyVariant * 100)
diseaseAnalysisObject := geneticAnalysis.PersonGenomeMonogenicDiseaseInfo{
PersonHasDisease: personHasDisease,
NumberOfVariantsTested: numberOfVariantsTested,
NumberOfLociTested: numberOfLociTested,
NumberOfPhasedLoci: numberOfPhasedLoci,
ProbabilityOfPassingADiseaseVariant: percentageProbabilityPersonWillPassADiseaseVariant,
VariantsInfoMap: variantsInfoMap,
}
monogenicDiseaseInfoMap[genomeIdentifier] = diseaseAnalysisObject
}
personMonogenicDiseaseInfoObject := geneticAnalysis.PersonMonogenicDiseaseInfo{
MonogenicDiseaseInfoMap: monogenicDiseaseInfoMap,
}
if (len(monogenicDiseaseInfoMap) <= 1){
// We do not need to check for conflicts, there is only <=1 genome with disease information
// Nothing left to do. Analysis is complete.
return personMonogenicDiseaseInfoObject, nil
}
// We check for conflicts
getConflictExistsBool := func()(bool, error){
firstItemReached := false
personHasDisease := false
probabilityOfPassingAVariant := 0
for _, currentGenomeDiseaseAnalysisObject := range monogenicDiseaseInfoMap{
currentGenomePersonHasDisease := currentGenomeDiseaseAnalysisObject.PersonHasDisease
currentGenomeProbabilityOfPassingAVariant := currentGenomeDiseaseAnalysisObject.ProbabilityOfPassingADiseaseVariant
if (firstItemReached == false){
personHasDisease = currentGenomePersonHasDisease
probabilityOfPassingAVariant = currentGenomeProbabilityOfPassingAVariant
firstItemReached = true
continue
}
if (currentGenomePersonHasDisease != personHasDisease){
return true, nil
}
if (currentGenomeProbabilityOfPassingAVariant != probabilityOfPassingAVariant){
return true, nil
}
}
// Now we test variants for conflicts
// We are only doing this to see if there are variants which one genome has and another doesn't
// For example, the analysis results say that you have a 50% chance of passing a variant for both genomes, but
// they have detected a different variant for each genome.
// This means that your real risk of passing a variant may actually be higher, and you are more likely to have the disease too
for variantIdentifier, _ := range variantRSIDsMap{
// Each variant base pair is either true/false, true/true, false/false, false/true
// Two different genomes have true/false and false/true, it will not count as a conflict
// If the locus is unphased, then there is no difference between true/false and false/true
// If the locus is phased, then this flip is only meaningful if it effects the probability of disease/passing a variant
// We already checked those probabilities for conflicts earlier
// Therefore, any flip is not considered a conflict
// We only care about conflicts where 1 genome says you have a variant and the other says you don't, or
// one says you have only 1 mutation and the other says you have 2 at that location
firstItemReached := false
base1HasVariant := false
base2HasVariant := false
for _, currentGenomeDiseaseAnalysisObject := range monogenicDiseaseInfoMap{
variantsInfoMap := currentGenomeDiseaseAnalysisObject.VariantsInfoMap
variantInfoObject, exists := variantsInfoMap[variantIdentifier]
if (exists == false){
if (firstItemReached == true){
// A previous genome has information for this variant, and the current one does not
return true, nil
}
continue
}
currentBase1HasVariant := variantInfoObject.Base1HasVariant
currentBase2HasVariant := variantInfoObject.Base2HasVariant
if (firstItemReached == false){
base1HasVariant = currentBase1HasVariant
base2HasVariant = currentBase2HasVariant
firstItemReached = true
continue
}
if (base1HasVariant == currentBase1HasVariant && base2HasVariant == currentBase2HasVariant){
// No conflict exists
continue
}
if (base1HasVariant == currentBase2HasVariant && base2HasVariant == currentBase1HasVariant){
// We don't count this as a conflict
continue
}
// A conflict exists
return true, nil
}
}
return false, nil
}
conflictExists, err := getConflictExistsBool()
if (err != nil) { return emptyDiseaseInfoObject, err }
personMonogenicDiseaseInfoObject.ConflictExists = conflictExists
return personMonogenicDiseaseInfoObject, nil
}
//Outputs:
// -geneticAnalysis.PersonPolygenicDiseaseInfo
// -error
func getPersonPolygenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, diseaseObject polygenicDiseases.PolygenicDisease)(geneticAnalysis.PersonPolygenicDiseaseInfo, error){
// We use this when returning errors
emptyDiseaseInfoObject := geneticAnalysis.PersonPolygenicDiseaseInfo{}
diseaseLociList := diseaseObject.LociList
// This map stores the polygenic disease for each of the person's genomes
// Map Structure: Genome Identifier -> PersonGenomePolygenicDiseaseInfo
personPolygenicDiseaseInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomePolygenicDiseaseInfo)
// We construct polygenic disease probability info for each genome
for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{
genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier
genomeMap := genomeWithMetadataObject.GenomeMap
// Map Structure: Locus Identifier -> PersonGenomePolygenicDiseaseLocusInfo
genomeLociInfoMap := make(map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo)
minimumPossibleRiskWeightSum := 0
maximumPossibleRiskWeightSum := 0
summedDiseaseRiskWeight := 0
for _, locusObject := range diseaseLociList{
locusIdentifierHex := locusObject.LocusIdentifier
locusIdentifier, err := encoding.DecodeHexStringTo3ByteArray(locusIdentifierHex)
if (err != nil) { return emptyDiseaseInfoObject, err }
locusRSID := locusObject.LocusRSID
locusRiskWeightsMap := locusObject.RiskWeightsMap
locusOddsRatiosMap := locusObject.OddsRatiosMap
locusMinimumWeight := locusObject.MinimumRiskWeight
locusMaximumWeight := locusObject.MaximumRiskWeight
basePairValueFound, locusBase1Value, locusBase2Value, _, err := getGenomeLocusBasePair(genomeMap, locusRSID)
if (err != nil) { return emptyDiseaseInfoObject, err }
if (basePairValueFound == false){
continue
}
//Outputs:
// -int: Genome disease locus risk weight
// -bool: Genome disease locus odds ratio known
// -float64: Genome disease locus odds ratio
// -error
getGenomeDiseaseLocusRiskInfo := func()(int, bool, float64, error){
locusBasePairJoined := locusBase1Value + ";" + locusBase2Value
riskWeight, exists := locusRiskWeightsMap[locusBasePairJoined]
if (exists == false){
// This is an unknown base combination
// We will treat it as a 0 risk weight
return 0, true, 1, nil
}
if (riskWeight == 0){
return 0, true, 1, nil
}
oddsRatio, exists := locusOddsRatiosMap[locusBasePairJoined]
if (exists == false){
return riskWeight, false, 0, nil
}
return riskWeight, true, oddsRatio, nil
}
locusRiskWeight, locusOddsRatioIsKnown, locusOddsRatio, err := getGenomeDiseaseLocusRiskInfo()
if (err != nil) { return emptyDiseaseInfoObject, err }
newLocusInfoObject := geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo{
LocusBase1: locusBase1Value,
LocusBase2: locusBase2Value,
RiskWeight: locusRiskWeight,
OddsRatioIsKnown: locusOddsRatioIsKnown,
}
if (locusOddsRatioIsKnown == true){
newLocusInfoObject.OddsRatio = locusOddsRatio
}
genomeLociInfoMap[locusIdentifier] = newLocusInfoObject
minimumPossibleRiskWeightSum += locusMinimumWeight
maximumPossibleRiskWeightSum += locusMaximumWeight
summedDiseaseRiskWeight += locusRiskWeight
}
numberOfLociTested := len(genomeLociInfoMap)
if (numberOfLociTested == 0){
// We have no information about this disease for this genome
continue
}
diseaseRiskScore, err := helpers.ScaleNumberProportionally(true, summedDiseaseRiskWeight, minimumPossibleRiskWeightSum, maximumPossibleRiskWeightSum, 0, 10)
if (err != nil) { return emptyDiseaseInfoObject, err }
newDiseaseInfoObject := geneticAnalysis.PersonGenomePolygenicDiseaseInfo{
NumberOfLociTested: numberOfLociTested,
RiskScore: diseaseRiskScore,
LociInfoMap: genomeLociInfoMap,
}
personPolygenicDiseaseInfoMap[genomeIdentifier] = newDiseaseInfoObject
}
newPersonPolygenicDiseaseInfoObject := geneticAnalysis.PersonPolygenicDiseaseInfo{
PolygenicDiseaseInfoMap: personPolygenicDiseaseInfoMap,
}
if (len(personPolygenicDiseaseInfoMap) <= 1){
// We do not need to check for conflicts, there is only <=1 genome with disease information
// Nothing left to do. Analysis is complete.
return newPersonPolygenicDiseaseInfoObject, nil
}
// We check for conflicts between the different genome's results
getConflictExistsBool := func()(bool, error){
// First we check to see if any of the genomes have different risk scores or NumberOfLociTested
genomeRiskScore := 0
genomeNumberOfLociTested := 0
firstItemReached := false
for _, personGenomeDiseaseInfoObject := range personPolygenicDiseaseInfoMap{
currentGenomeRiskScore := personGenomeDiseaseInfoObject.RiskScore
currentGenomeNumberOfLociTested := personGenomeDiseaseInfoObject.NumberOfLociTested
if (firstItemReached == false){
genomeRiskScore = currentGenomeRiskScore
genomeNumberOfLociTested = currentGenomeNumberOfLociTested
firstItemReached = true
continue
}
if (genomeRiskScore != currentGenomeRiskScore){
return true, nil
}
if (genomeNumberOfLociTested != currentGenomeNumberOfLociTested){
return true, nil
}
}
// Now we check for conflicts between the different locus values
// We consider a conflict any time the same locus has different weights/odds ratios
// We don't care if the loci have different base pair values, so long as those base pairs have the same risk weights/odds ratios
for _, locusObject := range diseaseLociList{
locusIdentifierHex := locusObject.LocusIdentifier
locusIdentifier, err := encoding.DecodeHexStringTo3ByteArray(locusIdentifierHex)
if (err != nil) { return false, err }
locusRiskWeight := 0
locusOddsRatio := float64(0)
firstItemReached := false
for _, personGenomeDiseaseInfoObject := range personPolygenicDiseaseInfoMap{
genomeLociInfoMap := personGenomeDiseaseInfoObject.LociInfoMap
genomeLocusObject, exists := genomeLociInfoMap[locusIdentifier]
if (exists == false){
if (firstItemReached == true){
// A previous genome has information for this locus, and the current one does not
return true, nil
}
continue
}
genomeLocusRiskWeight := genomeLocusObject.RiskWeight
genomeLocusOddsRatio := genomeLocusObject.OddsRatio
if (firstItemReached == false){
locusRiskWeight = genomeLocusRiskWeight
locusOddsRatio = genomeLocusOddsRatio
firstItemReached = true
continue
}
if (locusRiskWeight == genomeLocusRiskWeight && locusOddsRatio == genomeLocusOddsRatio){
// No conflict exists for this locus on the genomes we have already checked
continue
}
// Conflict exists
return true, nil
}
}
return false, nil
}
conflictExists, err := getConflictExistsBool()
if (err != nil) { return emptyDiseaseInfoObject, err }
newPersonPolygenicDiseaseInfoObject.ConflictExists = conflictExists
return newPersonPolygenicDiseaseInfoObject, nil
}
//Outputs:
// -geneticAnalysis.PersonTraitInfo: Trait analysis object
// -error
func getPersonTraitAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, traitObject traits.Trait)(geneticAnalysis.PersonTraitInfo, error){
// We use this when returning errors
emptyPersonTraitInfo := geneticAnalysis.PersonTraitInfo{}
traitLociList := traitObject.LociList
traitRulesList := traitObject.RulesList
// Map Structure: Genome Identifier -> PersonGenomeTraitInfo
newPersonTraitInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomeTraitInfo)
for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{
genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier
genomeMap := genomeWithMetadataObject.GenomeMap
// This map contains the locus values for the genome
// If an locus's entry doesn't exist, its value is unknown
// Map Structure: Locus rsID -> Locus Value
genomeLocusValuesMap := make(map[int64]locusValue.LocusValue)
for _, locusRSID := range traitLociList{
locusBasePairKnown, locusBase1, locusBase2, locusIsPhased, err := getGenomeLocusBasePair(genomeMap, locusRSID)
if (err != nil) { return emptyPersonTraitInfo, err }
if (locusBasePairKnown == false){
continue
}
newLocusValue := locusValue.LocusValue{
LocusIsPhased: locusIsPhased,
Base1Value: locusBase1,
Base2Value: locusBase2,
}
genomeLocusValuesMap[locusRSID] = newLocusValue
}
// This map contains the trait outcome scores for the genome
// Map Structure: Outcome Name -> Score
// Example: "Intolerant" -> 5
traitOutcomeScoresMap := make(map[string]int)
// Map Structure: Rule Identifier -> Genome Passes rule (true if the genome passes the rule)
personPassesRulesMap := make(map[[3]byte]bool)
if (len(traitRulesList) != 0){
// At least 1 rule exists for this trait
for _, ruleObject := range traitRulesList{
ruleIdentifierHex := ruleObject.RuleIdentifier
ruleIdentifier, err := encoding.DecodeHexStringTo3ByteArray(ruleIdentifierHex)
if (err != nil) { return emptyPersonTraitInfo, err }
ruleLociList := ruleObject.LociList
// Outputs:
// -bool: Genome passes rule is known
// -bool: Genome passes rule
// -error
getGenomePassesRuleBool := func()(bool, bool, error){
// We check to see if genome passes all rule loci
// We consider a rule Known if the genome either passes all loci, or fails to pass 1 locus
// We consider a rule Unknown if any loci are unknown, and all loci which are known pass the rule
anyLocusIsUnknown := false
for _, locusObject := range ruleLociList{
locusRSID := locusObject.LocusRSID
locusBasePairKnown, locusBase1, locusBase2, _, err := getGenomeLocusBasePair(genomeMap, locusRSID)
if (err != nil) { return false, false, err }
if (locusBasePairKnown == false){
anyLocusIsUnknown = true
continue
}
locusBasePairJoined := locusBase1 + ";" + locusBase2
locusBasePairsList := locusObject.BasePairsList
genomePassesRuleLocus := slices.Contains(locusBasePairsList, locusBasePairJoined)
if (genomePassesRuleLocus == false){
// The genome has failed to pass a single rule locus, thus, the rule is not passed
return true, false, nil
}
}
if (anyLocusIsUnknown == true){
// The rule is not passed, but it's status is unknown
// There were no rules which were known not to pass
return false, false, nil
}
// All rules were passed
return true, true, nil
}
genomePassesRuleIsKnown, genomePassesRule, err := getGenomePassesRuleBool()
if (err != nil) { return emptyPersonTraitInfo, err }
if (genomePassesRuleIsKnown == false){
continue
}
personPassesRulesMap[ruleIdentifier] = genomePassesRule
// The rule has been passed by this genome
// We add the outcome points for the rule to the traitOutcomeScoresMap
ruleOutcomePointsMap := ruleObject.OutcomePointsMap
for traitOutcome, pointsChange := range ruleOutcomePointsMap{
traitOutcomeScoresMap[traitOutcome] += pointsChange
}
}
}
traitOutcomesList := traitObject.OutcomesList
// We add all outcomes for which there were no points
for _, traitOutcome := range traitOutcomesList{
_, exists := traitOutcomeScoresMap[traitOutcome]
if (exists == false){
traitOutcomeScoresMap[traitOutcome] = 0
}
}
numberOfRulesTested := len(personPassesRulesMap)
newPersonGenomeTraitInfo := geneticAnalysis.PersonGenomeTraitInfo{
NumberOfRulesTested: numberOfRulesTested,
LocusValuesMap: genomeLocusValuesMap,
OutcomeScoresMap: traitOutcomeScoresMap,
GenomePassesRulesMap: personPassesRulesMap,
}
newPersonTraitInfoMap[genomeIdentifier] = newPersonGenomeTraitInfo
}
newPersonTraitInfoObject := geneticAnalysis.PersonTraitInfo{
TraitInfoMap: newPersonTraitInfoMap,
}
if (len(newPersonTraitInfoMap) <= 1){
// We do not need to check for conflicts, there is only <=1 genome with trait information
// Nothing left to do. Analysis is complete.
return newPersonTraitInfoObject, nil
}
// We check for conflicts
getConflictExistsBool := func()(bool, error){
//TODO: Check for locus value conflicts once locus values are used in neural network prediction.
if (len(traitRulesList) == 0){
return false, nil
}
// We check to see if the outcome scores are the same for all genomes
// We also check each rule result
firstItemReached := false
outcomeScoresMap := make(map[string]int)
passesRulesMap := make(map[[3]byte]bool)
for _, genomeTraitInfoObject := range newPersonTraitInfoMap{
currentGenomeOutcomeScoresMap := genomeTraitInfoObject.OutcomeScoresMap
currentGenomePassesRulesMap := genomeTraitInfoObject.GenomePassesRulesMap
if (firstItemReached == false){
outcomeScoresMap = currentGenomeOutcomeScoresMap
passesRulesMap = currentGenomePassesRulesMap
firstItemReached = true
continue
}
areEqual := maps.Equal(currentGenomeOutcomeScoresMap, outcomeScoresMap)
if (areEqual == false){
// A conflict exists
return true, nil
}
areEqual = maps.Equal(currentGenomePassesRulesMap, passesRulesMap)
if (areEqual == false){
// A conflict exists
return true, nil
}
}
return false, nil
}
conflictExists, err := getConflictExistsBool()
if (err != nil) { return emptyPersonTraitInfo, err }
newPersonTraitInfoObject.ConflictExists = conflictExists
return newPersonTraitInfoObject, nil
}