Improved the genetic analysis creation process in various ways. Recombination breakpoints are more accurately predicted now.

This commit is contained in:
Simon Sarasova 2024-06-05 04:10:35 +00:00
parent 497f596b3b
commit ee976f49b3
No known key found for this signature in database
GPG key ID: EEDA4103C9C36944
5 changed files with 874 additions and 587 deletions

View file

@ -6,6 +6,7 @@ Small and insignificant changes may not be included in this log.
## Unversioned Changes ## Unversioned Changes
* Improved the genetic analysis creation process in various ways. Recombination breakpoints are more accurately predicted now. - *Simon Sarasova*
* Improved the identity hash generation tool. The fastest quantity of goroutines is now identified and used. - *Simon Sarasova* * Improved the identity hash generation tool. The fastest quantity of goroutines is now identified and used. - *Simon Sarasova*
* Improved the creation procedures, encoding format, and graphical presentation of genetic analyses. Map lists have been replaced by custom objects. - *Simon Sarasova* * Improved the creation procedures, encoding format, and graphical presentation of genetic analyses. Map lists have been replaced by custom objects. - *Simon Sarasova*
* Upgraded Circl to version 1.3.8. - *Simon Sarasova* * Upgraded Circl to version 1.3.8. - *Simon Sarasova*

View file

@ -9,4 +9,4 @@ Many other people have written code for modules which are imported by Seekia. Th
Name | Date Of First Commit | Number Of Commits Name | Date Of First Commit | Number Of Commits
--- | --- | --- --- | --- | ---
Simon Sarasova | June 13, 2023 | 249 Simon Sarasova | June 13, 2023 | 250

View file

@ -20,6 +20,7 @@ import "seekia/internal/appMemory"
import "seekia/internal/encoding" import "seekia/internal/encoding"
import "seekia/internal/genetics/companyAnalysis" import "seekia/internal/genetics/companyAnalysis"
import "seekia/internal/genetics/createGeneticAnalysis" import "seekia/internal/genetics/createGeneticAnalysis"
import "seekia/internal/genetics/locusValue"
import "seekia/internal/genetics/myChosenAnalysis" import "seekia/internal/genetics/myChosenAnalysis"
import "seekia/internal/genetics/myPeople" import "seekia/internal/genetics/myPeople"
import "seekia/internal/genetics/readGeneticAnalysis" import "seekia/internal/genetics/readGeneticAnalysis"
@ -3349,7 +3350,7 @@ func setViewMateProfilePage_PolygenicDiseaseLoci(window fyne.Window, diseaseName
} }
userLocusBase1, userLocusBase2, semicolonExists := strings.Cut(userLocusBasePair, ";") userLocusBase1, userLocusBase2, semicolonExists := strings.Cut(userLocusBasePair, ";")
if (semicolonExists == true){ if (semicolonExists == false){
return false, 0, false, "", errors.New("Database corrupt: Contains profile with invalid " + locusName + " value: " + userLocusBasePair) return false, 0, false, "", errors.New("Database corrupt: Contains profile with invalid " + locusName + " value: " + userLocusBasePair)
} }
@ -3659,94 +3660,46 @@ func setViewMateProfilePage_GeneticTraits(window fyne.Window, userOrOffspring st
myTraitLocusValuesMap, _, _, _, _, err := readGeneticAnalysis.GetPersonTraitInfoFromGeneticAnalysis(myAnalysisObject, traitName, myGenomeIdentifier) myTraitLocusValuesMap, _, _, _, _, err := readGeneticAnalysis.GetPersonTraitInfoFromGeneticAnalysis(myAnalysisObject, traitName, myGenomeIdentifier)
if (err != nil) { return false, nil, 0, err } if (err != nil) { return false, nil, 0, err }
offspringTraitOutcomeScoresMap := make(map[string]float64) // We construct the user's trait locus values map
offspringNumberOfRulesTested := 0 // Map Structure: Locus rsID -> locusValue.LocusValue
userTraitLocusValuesMap := make(map[int64]locusValue.LocusValue)
for _, traitRuleObject := range traitRulesList{ traitLociList := traitObject.LociList
ruleLociList := traitRuleObject.LociList for _, rsID := range traitLociList{
//Outputs: rsIDString := helpers.ConvertInt64ToString(rsID)
// -bool: Probability is known
// -float64: Offspring probability of passing rule
// -error
getOffspringProbabilityOfPassingRule := func()(bool, float64, error){
offspringProbabilityOfPassingRule := float64(1) userLocusValueAttributeName := "LocusValue_rs" + rsIDString
for _, ruleLocusObject := range ruleLociList{
locusRSID := ruleLocusObject.LocusRSID
locusRSIDString := helpers.ConvertInt64ToString(locusRSID)
userLocusValueAttributeName := "LocusValue_rs" + locusRSIDString
userLocusBasePairIsKnown, _, userLocusBasePair, err := getAnyUserProfileAttributeFunction(userLocusValueAttributeName) userLocusBasePairIsKnown, _, userLocusBasePair, err := getAnyUserProfileAttributeFunction(userLocusValueAttributeName)
if (err != nil) { return false, 0, err }
if (userLocusBasePairIsKnown == false){
// We must know all rule loci base pairs to determine offspring probability of passing rule
return false, 0, nil
}
userLocusBase1, userLocusBase2, semicolonExists := strings.Cut(userLocusBasePair, ";")
if (semicolonExists == false){
return false, 0, errors.New("Database corrupt: Contains profile with invalid " + userLocusValueAttributeName + ": " + userLocusBasePair)
}
myLocusValue, myLocusValueIsKnown := myTraitLocusValuesMap[locusRSID]
if (myLocusValueIsKnown == false){
// We must know all rule loci base pairs to determine offspring probability of passing rule
return false, 0, nil
}
myLocusBase1 := myLocusValue.Base1Value
myLocusBase2 := myLocusValue.Base2Value
locusRequiredBasePairsList := ruleLocusObject.BasePairsList
offspringProbabilityOfPassingRuleLocus, err := createGeneticAnalysis.GetOffspringTraitRuleLocusInfo(locusRequiredBasePairsList, userLocusBase1, userLocusBase2, myLocusBase1, myLocusBase2)
if (err != nil) { return false, 0, err }
offspringProbabilityOfPassingRule *= offspringProbabilityOfPassingRuleLocus
}
return true, offspringProbabilityOfPassingRule, nil
}
offspringProbabilityOfPassingRuleKnown, offspringProbabilityOfPassingRule, err := getOffspringProbabilityOfPassingRule()
if (err != nil) { return false, nil, 0, err } if (err != nil) { return false, nil, 0, err }
if (offspringProbabilityOfPassingRuleKnown == false){ if (userLocusBasePairIsKnown == false){
continue continue
} }
offspringNumberOfRulesTested += 1
ruleOutcomePointsMap := traitRuleObject.OutcomePointsMap userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusBasePair, ";")
if (semicolonFound == false){
for traitOutcome, pointsEffect := range ruleOutcomePointsMap{ return false, nil, 0, errors.New("Database corrupt: Contains profile with invalid " + userLocusValueAttributeName + " value: " + userLocusBasePair)
pointsToAdd := float64(pointsEffect) * offspringProbabilityOfPassingRule
offspringTraitOutcomeScoresMap[traitOutcome] += pointsToAdd
}
} }
if (offspringNumberOfRulesTested == 0){ userLocusValue := locusValue.LocusValue{
Base1Value: userLocusBase1,
Base2Value: userLocusBase2,
//TODO: Share LocusIsPhased information in user profiles and retrieve it into this value
LocusIsPhased: false,
}
userTraitLocusValuesMap[rsID] = userLocusValue
}
anyRuleTested, offspringNumberOfRulesTested, _, offspringAverageOutcomeScoresMap, err := createGeneticAnalysis.GetOffspringTraitInfo(traitObject, myTraitLocusValuesMap, userTraitLocusValuesMap)
if (err != nil) { return false, nil, 0, err }
if (anyRuleTested == false){
return false, nil, 0, nil return false, nil, 0, nil
} }
traitOutcomesList := traitObject.OutcomesList return true, offspringAverageOutcomeScoresMap, offspringNumberOfRulesTested, nil
// We add all outcomes for which there were no points
for _, traitOutcome := range traitOutcomesList{
_, exists := offspringTraitOutcomeScoresMap[traitOutcome]
if (exists == false){
offspringTraitOutcomeScoresMap[traitOutcome] = 0
}
}
return true, offspringTraitOutcomeScoresMap, offspringNumberOfRulesTested, nil
} }
if (userOrOffspring == "User"){ if (userOrOffspring == "User"){
@ -3903,11 +3856,103 @@ func setViewMateProfilePage_TraitRules(window fyne.Window, traitName string, use
traitNameRow := container.NewHBox(layout.NewSpacer(), traitNameLabel, traitNameText, viewTraitInfoButton, layout.NewSpacer()) traitNameRow := container.NewHBox(layout.NewSpacer(), traitNameLabel, traitNameText, viewTraitInfoButton, layout.NewSpacer())
//Outputs:
// -bool: Any trait locus value exists for this myself
// -map[int64]locusValue.LocusValue: My locus values map
// -error
getMyTraitLocusValuesMap := func()(bool, map[int64]locusValue.LocusValue, error){
myPersonChosen, myGenomesExist, myAnalysisIsReady, myAnalysisObject, myGenomeIdentifier, _, err := myChosenAnalysis.GetMyChosenMateGeneticAnalysis()
if (err != nil) { return false, nil, err }
if (myPersonChosen == false || myGenomesExist == false || myAnalysisIsReady == false){
// Without my genome person chosen, all offspring rule probabilities are unknown
return false, nil, nil
}
myTraitLocusValuesMap, _, _, _, _, err := readGeneticAnalysis.GetPersonTraitInfoFromGeneticAnalysis(myAnalysisObject, traitName, myGenomeIdentifier)
if (err != nil) { return false, nil, err }
if (len(myTraitLocusValuesMap) == 0){
return false, nil, nil
}
return true, myTraitLocusValuesMap, nil
}
anyMyTraitLocusValuesExist, myTraitLocusValuesMap, err := getMyTraitLocusValuesMap()
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
return
}
traitObject, err := traits.GetTraitObject(traitName)
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
return
}
traitLociList := traitObject.LociList
traitRulesList := traitObject.RulesList
//Outputs:
// -bool: Any trait locus value exists for this user
// -map[int64]locusValue.LocusValue: User locus values map
// -error
getUserTraitLocusValuesMap := func()(bool, map[int64]locusValue.LocusValue, error){
// We construct the user's trait locus values map
// Map Structure: Locus rsID -> locusValue.LocusValue
userTraitLocusValuesMap := make(map[int64]locusValue.LocusValue)
for _, rsID := range traitLociList{
rsIDString := helpers.ConvertInt64ToString(rsID)
userLocusValueAttributeName := "LocusValue_rs" + rsIDString
userLocusBasePairIsKnown, _, userLocusBasePair, err := getAnyUserProfileAttributeFunction(userLocusValueAttributeName)
if (err != nil) { return false, nil, err }
if (userLocusBasePairIsKnown == false){
continue
}
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusBasePair, ";")
if (semicolonFound == false){
return false, nil, errors.New("Database corrupt: Contains profile with invalid " + userLocusValueAttributeName + " value: " + userLocusBasePair)
}
userLocusValue := locusValue.LocusValue{
Base1Value: userLocusBase1,
Base2Value: userLocusBase2,
//TODO: Share LocusIsPhased information in user profiles and retrieve it into this value
LocusIsPhased: false,
}
userTraitLocusValuesMap[rsID] = userLocusValue
}
if (len(userTraitLocusValuesMap) == 0){
return false, nil, nil
}
return true, userTraitLocusValuesMap, nil
}
anyUserTraitLocusValueExists, userTraitLocusValuesMap, err := getUserTraitLocusValuesMap()
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
return
}
//Outputs: //Outputs:
// -bool: Status is known // -bool: Status is known
// -bool: User passes rule // -bool: User passes rule
// -error // -error
getUserPassesRuleBool := func(ruleIdentifier string, ruleLociList []traits.RuleLocus)(bool, bool, error){ getUserPassesRuleBool := func(ruleLociList []traits.RuleLocus)(bool, bool, error){
if (anyUserTraitLocusValueExists == false){
return false, false, nil
}
allRuleLociKnown := true allRuleLociKnown := true
@ -3915,19 +3960,19 @@ func setViewMateProfilePage_TraitRules(window fyne.Window, traitName string, use
locusRSID := ruleLocusObject.LocusRSID locusRSID := ruleLocusObject.LocusRSID
locusRSIDString := helpers.ConvertInt64ToString(locusRSID) userLocusValue, userLocusValueIsKnown := userTraitLocusValuesMap[locusRSID]
if (userLocusValueIsKnown == false){
userLocusValueAttributeName := "LocusValue_rs" + locusRSIDString
userLocusBasePairIsKnown, _, userLocusBasePair, err := getAnyUserProfileAttributeFunction(userLocusValueAttributeName)
if (err != nil) { return false, false, err }
if (userLocusBasePairIsKnown == false){
// We know rule is not passed // We know rule is not passed
// We keep searching to see if ruleIsPassed status is No or Unknown // We keep searching to see if ruleIsPassed status is No or Unknown
allRuleLociKnown = false allRuleLociKnown = false
continue continue
} }
userLocusBase1Value := userLocusValue.Base1Value
userLocusBase2Value := userLocusValue.Base2Value
userLocusBasePair := userLocusBase1Value + ";" + userLocusBase2Value
ruleLocusBasePairsList := ruleLocusObject.BasePairsList ruleLocusBasePairsList := ruleLocusObject.BasePairsList
userPassesRuleLocus := slices.Contains(ruleLocusBasePairsList, userLocusBasePair) userPassesRuleLocus := slices.Contains(ruleLocusBasePairsList, userLocusBasePair)
@ -3943,104 +3988,55 @@ func setViewMateProfilePage_TraitRules(window fyne.Window, traitName string, use
return true, true, nil return true, true, nil
} }
myPersonChosen, myGenomesExist, myAnalysisIsReady, myAnalysisObject, myGenomeIdentifier, _, err := myChosenAnalysis.GetMyChosenMateGeneticAnalysis() //Outputs:
// -bool: Any offspring probability of passing rule is known
// -map[[3]byte]int: Offspring probability of passing rules map
// Map Structure: Rule Identifier -> Probability offspring will pass rule (0-100%)
// -error
getOffspringProbabilityOfPassingRulesMap := func()(bool, map[[3]byte]int, error){
if (anyMyTraitLocusValuesExist == false || anyUserTraitLocusValueExists == false){
return false, nil, nil
}
anyOffspringRulesTested, _, offspringProbabilityOfPassingRulesMap, _, err := createGeneticAnalysis.GetOffspringTraitInfo(traitObject, myTraitLocusValuesMap, userTraitLocusValuesMap)
if (err != nil) { return false, nil, err }
if (anyOffspringRulesTested == false){
return false, nil, nil
}
return true, offspringProbabilityOfPassingRulesMap, nil
}
anyOffspringProbabilityOfPassingRuleIsKnown, offspringProbabilityOfPassingRulesMap, err := getOffspringProbabilityOfPassingRulesMap()
if (err != nil) { if (err != nil) {
setErrorEncounteredPage(window, err, previousPage) setErrorEncounteredPage(window, err, previousPage)
return return
} }
//Outputs:
// -bool: Probability is known
// -int: Probability of passing rule (0-100)
// -error
getOffspringProbabilityOfPassingRule := func(ruleIdentifier string, ruleLociList []traits.RuleLocus)(bool, int, error){
if (myPersonChosen == false || myGenomesExist == false || myAnalysisIsReady == false){
// Without my genome person chosen, all offspring rule probabilities are unknown
return false, 0, nil
}
myTraitLocusValuesMap, _, _, _, _, err := readGeneticAnalysis.GetPersonTraitInfoFromGeneticAnalysis(myAnalysisObject, traitName, myGenomeIdentifier)
if (err != nil) { return false, 0, err }
offspringProbabilityOfPassingRule := float64(1)
for _, ruleLocusObject := range ruleLociList{
locusRSID := ruleLocusObject.LocusRSID
locusRSIDString := helpers.ConvertInt64ToString(locusRSID)
userLocusValueAttributeName := "LocusValue_rs" + locusRSIDString
userLocusBasePairIsKnown, _, userLocusBasePair, err := getAnyUserProfileAttributeFunction(userLocusValueAttributeName)
if (err != nil) { return false, 0, err }
if (userLocusBasePairIsKnown == false){
// We must know all rule loci base pairs to determine offspring probability of passing rule
return false, 0, nil
}
userLocusBase1, userLocusBase2, semicolonExists := strings.Cut(userLocusBasePair, ";")
if (semicolonExists == false){
return false, 0, errors.New("Database corrupt: Contains profile with invalid: " + userLocusValueAttributeName + ": " + userLocusBasePair)
}
myLocusValue, myLocusValueIsKnown := myTraitLocusValuesMap[locusRSID]
if (myLocusValueIsKnown == false){
// We must know all rule loci base pairs to determine offspring probability of passing rule
return false, 0, nil
}
myLocusBase1 := myLocusValue.Base1Value
myLocusBase2 := myLocusValue.Base2Value
locusRequiredBasePairsList := ruleLocusObject.BasePairsList
offspringProbabilityOfPassingRuleLocus, err := createGeneticAnalysis.GetOffspringTraitRuleLocusInfo(locusRequiredBasePairsList, userLocusBase1, userLocusBase2, myLocusBase1, myLocusBase2)
if (err != nil) { return false, 0, err }
offspringProbabilityOfPassingRule *= offspringProbabilityOfPassingRuleLocus
}
offspringPercentageProbabilityOfPassingRule := int(offspringProbabilityOfPassingRule * 100)
return true, offspringPercentageProbabilityOfPassingRule, nil
}
traitObject, err := traits.GetTraitObject(traitName)
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
return
}
traitRulesList := traitObject.RulesList
totalNumberOfTraitRules := len(traitRulesList)
getNumberOfRulesTested := func()(int, error){ getNumberOfRulesTested := func()(int, error){
if (userOrOffspring == "Offspring"){
if (anyOffspringProbabilityOfPassingRuleIsKnown == false){
return 0, nil
}
numberOfRulesTested := len(offspringProbabilityOfPassingRulesMap)
return numberOfRulesTested, nil
}
numberOfRulesTested := 0 numberOfRulesTested := 0
for _, ruleObject := range traitRulesList{ for _, ruleObject := range traitRulesList{
ruleIdentifier := ruleObject.RuleIdentifier
ruleLociList := ruleObject.LociList ruleLociList := ruleObject.LociList
if (userOrOffspring == "User"){ ruleStatusIsKnown, _, err := getUserPassesRuleBool(ruleLociList)
ruleStatusIsKnown, _, err := getUserPassesRuleBool(ruleIdentifier, ruleLociList)
if (err != nil) { return 0, err } if (err != nil) { return 0, err }
if (ruleStatusIsKnown == true){ if (ruleStatusIsKnown == true){
numberOfRulesTested += 1 numberOfRulesTested += 1
} }
} else if (userOrOffspring == "Offspring"){
ruleProbabilityIsKnown, _, err := getOffspringProbabilityOfPassingRule(ruleIdentifier, ruleLociList)
if (err != nil) { return 0, err }
if (ruleProbabilityIsKnown == true){
numberOfRulesTested += 1
}
}
} }
return numberOfRulesTested, nil return numberOfRulesTested, nil
@ -4054,6 +4050,7 @@ func setViewMateProfilePage_TraitRules(window fyne.Window, traitName string, use
rulesTestedLabel := widget.NewLabel("Rules Tested:") rulesTestedLabel := widget.NewLabel("Rules Tested:")
totalNumberOfTraitRules := len(traitRulesList)
numberOfRulesTestedString := helpers.ConvertIntToString(numberOfRulesTested) numberOfRulesTestedString := helpers.ConvertIntToString(numberOfRulesTested)
totalNumberOfTraitRulesString := helpers.ConvertIntToString(totalNumberOfTraitRules) totalNumberOfTraitRulesString := helpers.ConvertIntToString(totalNumberOfTraitRules)
@ -4119,12 +4116,16 @@ func setViewMateProfilePage_TraitRules(window fyne.Window, traitName string, use
for _, ruleObject := range traitRulesList{ for _, ruleObject := range traitRulesList{
ruleIdentifier := ruleObject.RuleIdentifier ruleIdentifierHex := ruleObject.RuleIdentifier
ruleIdentifier, err := encoding.DecodeHexStringTo3ByteArray(ruleIdentifierHex)
if (err != nil) { return nil, err }
ruleLociList := ruleObject.LociList ruleLociList := ruleObject.LociList
getUserPassesRuleString := func()(string, error){ getUserPassesRuleString := func()(string, error){
userRuleStatusIsKnown, userPassesRule, err := getUserPassesRuleBool(ruleIdentifier, ruleLociList) userRuleStatusIsKnown, userPassesRule, err := getUserPassesRuleBool(ruleLociList)
if (err != nil) { return "", err } if (err != nil) { return "", err }
if (userRuleStatusIsKnown == false){ if (userRuleStatusIsKnown == false){
@ -4141,12 +4142,17 @@ func setViewMateProfilePage_TraitRules(window fyne.Window, traitName string, use
getOffspringProbabilityOfPassingRuleString := func()(string, error){ getOffspringProbabilityOfPassingRuleString := func()(string, error){
probabilityIsKnown, probabilityOfPassingRule, err := getOffspringProbabilityOfPassingRule(ruleIdentifier, ruleLociList) if (anyOffspringProbabilityOfPassingRuleIsKnown == false){
if (err != nil) { return "", err } result := translate("Unknown")
return result, nil
}
probabilityOfPassingRule, probabilityIsKnown := offspringProbabilityOfPassingRulesMap[ruleIdentifier]
if (probabilityIsKnown == false){ if (probabilityIsKnown == false){
result := translate("Unknown") result := translate("Unknown")
return result, nil return result, nil
} }
ruleProbabilityString := helpers.ConvertIntToString(probabilityOfPassingRule) ruleProbabilityString := helpers.ConvertIntToString(probabilityOfPassingRule)
ruleProbabilityFormatted := ruleProbabilityString + "%" ruleProbabilityFormatted := ruleProbabilityString + "%"
@ -4160,9 +4166,9 @@ func setViewMateProfilePage_TraitRules(window fyne.Window, traitName string, use
// We do this because the rule effects column may be multiple rows tall // We do this because the rule effects column may be multiple rows tall
viewRuleInfoButton := widget.NewButtonWithIcon("", theme.InfoIcon(), func(){ viewRuleInfoButton := widget.NewButtonWithIcon("", theme.InfoIcon(), func(){
setViewTraitRuleDetailsPage(window, traitName, ruleIdentifier, currentPage) setViewTraitRuleDetailsPage(window, traitName, ruleIdentifierHex, currentPage)
}) })
ruleIdentifierLabel := getBoldLabelCentered(ruleIdentifier) ruleIdentifierLabel := getBoldLabelCentered(ruleIdentifierHex)
userPassesRuleLabel := getBoldLabelCentered(userPassesRuleString) userPassesRuleLabel := getBoldLabelCentered(userPassesRuleString)
offspringProbabilityOfPassingRuleLabel := getBoldLabelCentered(offspringProbabilityOfPassingRuleString) offspringProbabilityOfPassingRuleLabel := getBoldLabelCentered(offspringProbabilityOfPassingRuleString)

View file

@ -7,26 +7,7 @@
package createGeneticAnalysis package createGeneticAnalysis
// TODO: Some of the probabilities produced by this package are wrong // Disclaimer: I am a novice in the ways of genetics. This package could be flawed in numerous ways.
// In this package, we are assuming that genetic recombination (the formation of the genetic sequences for the sperm/eggs)
// happens randomly for each allele locus
// In reality, the recombination break points occur less often, and larger portions of each chromosome remain intact.
// This effects the estimates and probabilities for all of the generated analyses
// In particular, the probability of passing a defective gene does not increase as much as this package currently
// estimates that it does, in the case of multiple defects existing in the same monongenic-disease-causing gene.
// Also, based on my research, I believe that recombination break points are less likely to occur within genes, meaning they are more likely to occur at the gene boundaries (codons)
// We need to remedy this problem and fix this package
// Research gene linkage and recombination to understand more.
//
// The phase of a loci is actually relevant and important for determining the person-has-disease status and will-pass-a-variant probability
// Users who have multiple heterozygous single-base mutations on different locations of the same gene may have the disease,
// but we need their genome locations to be phased to be able to know
// Having multiple variants within a gene might not increase the probability of passing a variant,
// assuming all of those variants were on the same chromosome
// Thus, we need phased loci to determine an accurate will-pass-a-variant probability
// We will still be able to determine will-pass-a-variant probabilities for users who only have 1 mutation on 1 base in the entire gene,
// regardless of if their loci phase is known or not. That probability is 50%.
// TODO: We want to eventually use neural nets for both trait and polygenic disease analysis (see geneticPrediction.go) // TODO: We want to eventually use neural nets for both trait and polygenic disease analysis (see geneticPrediction.go)
// These will be trained on a set of genomes and will output a probability analysis for each trait/disease // These will be trained on a set of genomes and will output a probability analysis for each trait/disease
@ -59,7 +40,7 @@ import "seekia/internal/genetics/prepareRawGenomes"
import "seekia/internal/helpers" import "seekia/internal/helpers"
import "errors" import "errors"
import "math" import mathRand "math/rand/v2"
import "strings" import "strings"
import "slices" import "slices"
import "maps" import "maps"
@ -372,17 +353,6 @@ func CreateCoupleGeneticAnalysis(person1GenomesList []prepareRawGenomes.RawGenom
variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex) variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex)
if (err != nil) { return err } if (err != nil) { return err }
// Outputs:
// -bool: Probabilities are known
// -int: Lower bound Percentage Probability that offspring will have 0 mutations
// -int: Upper bound Percentage Probability that offspring will have 0 mutations
// -int: Lower bound Percentage Probability that offspring will have 1 mutation
// -int: Upper bound Percentage Probability that offspring will have 1 mutation
// -int: Lower bound Percentage Probability that offspring will have 2 mutations
// -int: Upper bound Percentage Probability that offspring will have 2 mutations
// -error
getOffspringVariantProbabilities := func()(bool, int, int, int, int, int, int, error){
//Outputs: //Outputs:
// -bool: Probability is known // -bool: Probability is known
// -float64: Probability that person will pass variant to offspring (between 0 and 1) // -float64: Probability that person will pass variant to offspring (between 0 and 1)
@ -425,42 +395,13 @@ func CreateCoupleGeneticAnalysis(person1GenomesList []prepareRawGenomes.RawGenom
} }
person1VariantProbabilityIsKnown, person1WillPassVariantProbability, err := getPersonWillPassVariantProbability(person1DiseaseAnalysisObject, person1GenomeIdentifier) person1VariantProbabilityIsKnown, person1WillPassVariantProbability, err := getPersonWillPassVariantProbability(person1DiseaseAnalysisObject, person1GenomeIdentifier)
if (err != nil) { return false, 0, 0, 0, 0, 0, 0, err } if (err != nil) { return err }
person2VariantProbabilityIsKnown, person2WillPassVariantProbability, err := getPersonWillPassVariantProbability(person2DiseaseAnalysisObject, person2GenomeIdentifier) person2VariantProbabilityIsKnown, person2WillPassVariantProbability, err := getPersonWillPassVariantProbability(person2DiseaseAnalysisObject, person2GenomeIdentifier)
if (err != nil) { return false, 0, 0, 0, 0, 0, 0, err } if (err != nil) { return err }
if (person1VariantProbabilityIsKnown == false && person2VariantProbabilityIsKnown == false){ if (person1VariantProbabilityIsKnown == false && person2VariantProbabilityIsKnown == false){
return false, 0, 0, 0, 0, 0, 0, nil continue
}
//Outputs:
// -int: Percentage Probability of 0 mutations
// -int: Percentage Probability of 1 mutation
// -int: Percentage Probability of 2 mutations
// -error
getOffspringVariantProbabilities := func(person1WillPassVariantProbability float64, person2WillPassVariantProbability float64)(int, int, int, error){
// This is the probability that neither person will pass the variant
// P = P(!A) * P(!B)
probabilityOf0Mutations := (1 - person1WillPassVariantProbability) * (1 - person2WillPassVariantProbability)
// This is the probability that either person will pass the variant, but not both
// P(A XOR B) = P(A) + P(B) - (2 * P(A and B))
probabilityOf1Mutation := person1WillPassVariantProbability + person2WillPassVariantProbability - (2 * person1WillPassVariantProbability * person2WillPassVariantProbability)
// This is the probability that both people will pass the variant
// P(A and B) = P(A) * P(B)
probabilityOf2Mutations := person1WillPassVariantProbability * person2WillPassVariantProbability
percentageProbabilityOf0Mutations, err := helpers.FloorFloat64ToInt(probabilityOf0Mutations * 100)
if (err != nil) { return 0, 0, 0, err }
percentageProbabilityOf1Mutation, err := helpers.FloorFloat64ToInt(probabilityOf1Mutation * 100)
if (err != nil) { return 0, 0, 0, err }
percentageProbabilityOf2Mutations, err := helpers.FloorFloat64ToInt(probabilityOf2Mutations * 100)
if (err != nil) { return 0, 0, 0, err }
return percentageProbabilityOf0Mutations, percentageProbabilityOf1Mutation, percentageProbabilityOf2Mutations, nil
} }
// Outputs: // Outputs:
@ -484,11 +425,40 @@ func CreateCoupleGeneticAnalysis(person1GenomesList []prepareRawGenomes.RawGenom
bestCasePerson1WillPassVariantProbability, worstCasePerson1WillPassVariantProbability, bestCasePerson2WillPassVariantProbability, worstCasePerson2WillPassVariantProbability := getBestAndWorstCaseProbabilities() bestCasePerson1WillPassVariantProbability, worstCasePerson1WillPassVariantProbability, bestCasePerson2WillPassVariantProbability, worstCasePerson2WillPassVariantProbability := getBestAndWorstCaseProbabilities()
bestCase0MutationsProbability, bestCase1MutationProbability, bestCase2MutationsProbability, err := getOffspringVariantProbabilities(bestCasePerson1WillPassVariantProbability, bestCasePerson2WillPassVariantProbability) //Outputs:
if (err != nil) { return false, 0, 0, 0, 0, 0, 0, err } // -int: Percentage Probability of 0 mutations
// -int: Percentage Probability of 1 mutation
// -int: Percentage Probability of 2 mutations
// -error
getOffspringVariantMutationProbabilities := func(person1WillPassVariantProbability float64, person2WillPassVariantProbability float64)(int, int, int, error){
worstCase0MutationsProbability, worstCase1MutationProbability, worstCase2MutationsProbability, err := getOffspringVariantProbabilities(worstCasePerson1WillPassVariantProbability, worstCasePerson2WillPassVariantProbability) // This is the probability that neither person will pass the variant
if (err != nil) { return false, 0, 0, 0, 0, 0, 0, err } // P = P(!A) * P(!B)
probabilityOf0Mutations := (1 - person1WillPassVariantProbability) * (1 - person2WillPassVariantProbability)
// This is the probability that either person will pass the variant, but not both
// P(A XOR B) = P(A) + P(B) - (2 * P(A and B))
probabilityOf1Mutation := person1WillPassVariantProbability + person2WillPassVariantProbability - (2 * person1WillPassVariantProbability * person2WillPassVariantProbability)
// This is the probability that both people will pass the variant
// P(A and B) = P(A) * P(B)
probabilityOf2Mutations := person1WillPassVariantProbability * person2WillPassVariantProbability
percentageProbabilityOf0Mutations, err := helpers.FloorFloat64ToInt(probabilityOf0Mutations * 100)
if (err != nil) { return 0, 0, 0, err }
percentageProbabilityOf1Mutation, err := helpers.FloorFloat64ToInt(probabilityOf1Mutation * 100)
if (err != nil) { return 0, 0, 0, err }
percentageProbabilityOf2Mutations, err := helpers.FloorFloat64ToInt(probabilityOf2Mutations * 100)
if (err != nil) { return 0, 0, 0, err }
return percentageProbabilityOf0Mutations, percentageProbabilityOf1Mutation, percentageProbabilityOf2Mutations, nil
}
bestCase0MutationsProbability, bestCase1MutationProbability, bestCase2MutationsProbability, err := getOffspringVariantMutationProbabilities(bestCasePerson1WillPassVariantProbability, bestCasePerson2WillPassVariantProbability)
if (err != nil) { return err }
worstCase0MutationsProbability, worstCase1MutationProbability, worstCase2MutationsProbability, err := getOffspringVariantMutationProbabilities(worstCasePerson1WillPassVariantProbability, worstCasePerson2WillPassVariantProbability)
if (err != nil) { return err }
// We have to figure out which 1-mutation-probability is lower // We have to figure out which 1-mutation-probability is lower
// The best case probabilities can actually result in a higher probability for 1 mutation // The best case probabilities can actually result in a higher probability for 1 mutation
@ -500,25 +470,16 @@ func CreateCoupleGeneticAnalysis(person1GenomesList []prepareRawGenomes.RawGenom
lowerBound1MutationProbability := min(bestCase1MutationProbability, worstCase1MutationProbability) lowerBound1MutationProbability := min(bestCase1MutationProbability, worstCase1MutationProbability)
upperBound1MutationProbability := max(bestCase1MutationProbability, worstCase1MutationProbability) upperBound1MutationProbability := max(bestCase1MutationProbability, worstCase1MutationProbability)
return true, worstCase0MutationsProbability, bestCase0MutationsProbability, lowerBound1MutationProbability, upperBound1MutationProbability, bestCase2MutationsProbability, worstCase2MutationsProbability, nil
}
probabilitiesKnown, probabilityOf0MutationsLowerBound, probabilityOf0MutationsUpperBound, probabilityOf1MutationLowerBound, probabilityOf1MutationUpperBound, probabilityOf2MutationsLowerBound, probabilityOf2MutationsUpperBound, err := getOffspringVariantProbabilities()
if (err != nil) { return err }
if (probabilitiesKnown == false){
continue
}
newOffspringMonogenicDiseaseVariantInfoObject := geneticAnalysis.OffspringMonogenicDiseaseVariantInfo{ newOffspringMonogenicDiseaseVariantInfoObject := geneticAnalysis.OffspringMonogenicDiseaseVariantInfo{
ProbabilityOf0MutationsLowerBound: probabilityOf0MutationsLowerBound, ProbabilityOf0MutationsLowerBound: worstCase0MutationsProbability,
ProbabilityOf0MutationsUpperBound: probabilityOf0MutationsUpperBound, ProbabilityOf0MutationsUpperBound: bestCase0MutationsProbability,
ProbabilityOf1MutationLowerBound: probabilityOf1MutationLowerBound, ProbabilityOf1MutationLowerBound: lowerBound1MutationProbability,
ProbabilityOf1MutationUpperBound: probabilityOf1MutationUpperBound, ProbabilityOf1MutationUpperBound: upperBound1MutationProbability,
ProbabilityOf2MutationsLowerBound: probabilityOf2MutationsLowerBound, ProbabilityOf2MutationsLowerBound: bestCase2MutationsProbability,
ProbabilityOf2MutationsUpperBound: probabilityOf2MutationsUpperBound, ProbabilityOf2MutationsUpperBound: worstCase2MutationsProbability,
} }
offspringVariantsInfoMap[variantIdentifier] = newOffspringMonogenicDiseaseVariantInfoObject offspringVariantsInfoMap[variantIdentifier] = newOffspringMonogenicDiseaseVariantInfoObject
@ -851,7 +812,6 @@ func CreateCoupleGeneticAnalysis(person1GenomesList []prepareRawGenomes.RawGenom
for _, traitObject := range traitObjectsList{ for _, traitObject := range traitObjectsList{
traitName := traitObject.TraitName traitName := traitObject.TraitName
traitRulesList := traitObject.RulesList
person1TraitAnalysisObject, err := getPersonTraitAnalysis(person1GenomesWithMetadataList, traitObject) person1TraitAnalysisObject, err := getPersonTraitAnalysis(person1GenomesWithMetadataList, traitObject)
if (err != nil) { return false, "", err } if (err != nil) { return false, "", err }
@ -866,149 +826,33 @@ func CreateCoupleGeneticAnalysis(person1GenomesList []prepareRawGenomes.RawGenom
// This will add the offspring trait information for the provided genome pair to the offspringTraitInfoMap // This will add the offspring trait information for the provided genome pair to the offspringTraitInfoMap
addGenomePairTraitInfoToOffspringMap := func(person1GenomeIdentifier [16]byte, person2GenomeIdentifier [16]byte)error{ addGenomePairTraitInfoToOffspringMap := func(person1GenomeIdentifier [16]byte, person2GenomeIdentifier [16]byte)error{
// Map Structure: Outcome Name -> Outcome Score person1TraitInfoMap := person1TraitAnalysisObject.TraitInfoMap
// Example: "Intolerant" -> 2.5 person2TraitInfoMap := person2TraitAnalysisObject.TraitInfoMap
offspringAverageOutcomeScoresMap := make(map[string]float64)
// Map Structure: Rule Identifier -> Offspring Probability Of Passing Rule person1GenomeTraitInfoObject, exists := person1TraitInfoMap[person1GenomeIdentifier]
// The value stores the probability that the offspring will pass the rule
// This is a number between 0-100%
offspringProbabilityOfPassingRulesMap := make(map[[3]byte]int)
// We iterate through rules to determine genome pair trait info
for _, ruleObject := range traitRulesList{
ruleIdentifierHex := ruleObject.RuleIdentifier
ruleIdentifier, err := encoding.DecodeHexStringTo3ByteArray(ruleIdentifierHex)
if (err != nil) { return err }
// This is a list that describes the locus rsids and their values that must be fulfilled to pass the rule
ruleLocusObjectsList := ruleObject.LociList
//Outputs:
// -bool: Any rule loci are known
// -map[int64]locusValue.LocusValue: rsID -> Locus base pair value
// -error
getPersonGenomeTraitLociValuesMap := func(personGenomeIdentifier [16]byte, personTraitAnalysisObject geneticAnalysis.PersonTraitInfo)(bool, map[int64]locusValue.LocusValue, error){
personTraitInfoMap := personTraitAnalysisObject.TraitInfoMap
personGenomeTraitInfoObject, exists := personTraitInfoMap[personGenomeIdentifier]
if (exists == false){ if (exists == false){
// This person has no genome values for any loci for this trait // This person has no genome values for any loci for this trait
return false, nil, nil // No predictions are possible
return nil
} }
person2GenomeTraitInfoObject, exists := person2TraitInfoMap[person2GenomeIdentifier]
personLocusValuesMap := personGenomeTraitInfoObject.LocusValuesMap if (exists == false){
// This person has no genome values for any loci for this trait
return true, personLocusValuesMap, nil // No predictions are possible
}
anyPerson1LociKnown, person1GenomeTraitLociValuesMap, err := getPersonGenomeTraitLociValuesMap(person1GenomeIdentifier, person1TraitAnalysisObject)
if (err != nil) { return err }
if (anyPerson1LociKnown == false){
// We only know how many of the 4 prospective offspring pass the rule if all loci are known for both people's genomes
return nil return nil
} }
anyPerson2LociKnown, person2GenomeTraitLociValuesMap, err := getPersonGenomeTraitLociValuesMap(person2GenomeIdentifier, person2TraitAnalysisObject) person1LocusValuesMap := person1GenomeTraitInfoObject.LocusValuesMap
person2LocusValuesMap := person2GenomeTraitInfoObject.LocusValuesMap
anyRulesTested, numberOfRulesTested, offspringProbabilityOfPassingRulesMap, offspringAverageOutcomeScoresMap, err := GetOffspringTraitInfo(traitObject, person1LocusValuesMap, person2LocusValuesMap)
if (err != nil) { return err } if (err != nil) { return err }
if (anyPerson2LociKnown == false){ if (anyRulesTested == false){
// We only know how many of the 4 prospective offspring pass the rule if all loci are known for both people's genomes
return nil
}
getOffspringProbabilityOfPassingRule := func()(bool, int, error){
// This is a probability between 0 and 1
offspringProbabilityOfPassingRule := float64(1)
for _, ruleLocusObject := range ruleLocusObjectsList{
locusRSID := ruleLocusObject.LocusRSID
person1LocusValue, exists := person1GenomeTraitLociValuesMap[locusRSID]
if (exists == false){
// We don't know the locus value for this rule for this person
// Thus, we cannot calculate the probabilityOfPassingRule for the offspring
return false, 0, nil
}
person2LocusValue, exists := person2GenomeTraitLociValuesMap[locusRSID]
if (exists == false){
// We don't know the locus value for this rule for this person
// Thus, we cannot calculate the probabilityOfPassingRule for the offspring
return false, 0, nil
}
locusRequiredBasePairsList := ruleLocusObject.BasePairsList
person1LocusBase1Value := person1LocusValue.Base1Value
person1LocusBase2Value := person1LocusValue.Base2Value
person2LocusBase1Value := person2LocusValue.Base1Value
person2LocusBase2Value := person2LocusValue.Base2Value
offspringProbabilityOfPassingRuleLocus, err := GetOffspringTraitRuleLocusInfo(locusRequiredBasePairsList, person1LocusBase1Value, person1LocusBase2Value, person2LocusBase1Value, person2LocusBase2Value)
if (err != nil) { return false, 0, err }
offspringProbabilityOfPassingRule *= offspringProbabilityOfPassingRuleLocus
}
offspringPercentageProbabilityOfPassingRule := offspringProbabilityOfPassingRule * 100
probabilityRounded, err := helpers.FloorFloat64ToInt(offspringPercentageProbabilityOfPassingRule)
if (err != nil) { return false, 0, err }
return true, probabilityRounded, nil
}
ruleProbabilityIsKnown, offspringPercentageProbabilityOfPassingRule, err := getOffspringProbabilityOfPassingRule()
if (err != nil) { return err }
if (ruleProbabilityIsKnown == false){
// We continue to the next rule
continue
}
offspringProbabilityOfPassingRulesMap[ruleIdentifier] = offspringPercentageProbabilityOfPassingRule
// This is the 0 - 1 probability value
offspringProbabilityOfPassingRule := float64(offspringPercentageProbabilityOfPassingRule)/100
ruleOutcomePointsMap := ruleObject.OutcomePointsMap
for outcomeName, outcomePointsEffect := range ruleOutcomePointsMap{
pointsToAdd := float64(outcomePointsEffect) * offspringProbabilityOfPassingRule
offspringAverageOutcomeScoresMap[outcomeName] += pointsToAdd
}
}
numberOfRulesTested := len(offspringProbabilityOfPassingRulesMap)
if (numberOfRulesTested == 0){
// No rules were tested for this trait // No rules were tested for this trait
// We will not add anything to the trait info map for this genome pair // We will not add anything to the trait info map for this genome pair
return nil return nil
} }
traitOutcomesList := traitObject.OutcomesList
// We add a 0 outcome for outcomes without any points
for _, outcomeName := range traitOutcomesList{
_, exists := offspringAverageOutcomeScoresMap[outcomeName]
if (exists == false){
// No rules effected this outcome.
offspringAverageOutcomeScoresMap[outcomeName] = 0
}
}
newOffspringGenomePairTraitInfoObject := geneticAnalysis.OffspringGenomePairTraitInfo{ newOffspringGenomePairTraitInfoObject := geneticAnalysis.OffspringGenomePairTraitInfo{
NumberOfRulesTested: numberOfRulesTested, NumberOfRulesTested: numberOfRulesTested,
OffspringAverageOutcomeScoresMap: offspringAverageOutcomeScoresMap, OffspringAverageOutcomeScoresMap: offspringAverageOutcomeScoresMap,
@ -1094,6 +938,275 @@ func CreateCoupleGeneticAnalysis(person1GenomesList []prepareRawGenomes.RawGenom
return true, analysisString, nil return true, analysisString, nil
} }
// This function will return a list of 100 prospective offspring genomes
// Each genome represents an equal-probability offspring genome from both people's genomes
// This function takes into account the effects of genetic linkage
// Any locations which do not exist in both people's genomes will not be included
//Outputs:
// -bool: Any locus value exists between both users
// -[]map[int64]locusValue.LocusValue
// -error
func getProspectiveOffspringGenomesList(lociList []int64, person1LociMap map[int64]locusValue.LocusValue, person2LociMap map[int64]locusValue.LocusValue)(bool, []map[int64]locusValue.LocusValue, error){
// -We use randomness to generate the offspring genomes
// -We want the results to be the same for each pair of people each time, so we have to seed our randomness generator
// -This is necessary so that two people's analysis results do not change every time
// -Instead, the same 2 people will produce the exact same result every time
pseudorandomNumberGenerator := mathRand.New(mathRand.NewPCG(1, 2))
//Outputs:
// -[]int64: A list of random breakpoints for this chromosome that are statistically accurate
// -error
getRandomChromosomeBreakpoints := func(chromosome int)([]int64, error){
getChromosomeLength := func()(int64, error){
// Approximate number of base pairs in each chromosome taken from: https://www.ncbi.nlm.nih.gov/books/NBK557784/
switch chromosome{
case 1:{
return 249000000, nil
}
case 2:{
return 243000000, nil
}
case 3:{
return 200000000, nil
}
case 4:{
return 192000000, nil
}
case 5:{
return 181000000, nil
}
case 6:{
return 170000000, nil
}
case 7:{
return 158000000, nil
}
case 8:{
return 146000000, nil
}
case 9:{
return 140000000, nil
}
case 10:{
return 135000000, nil
}
case 11:{
return 135000000, nil
}
case 12:{
return 132000000, nil
}
case 13:{
return 114000000, nil
}
case 14:{
return 106000000, nil
}
case 15:{
return 100000000, nil
}
case 16:{
return 89000000, nil
}
case 17:{
return 79000000, nil
}
case 18:{
return 76000000, nil
}
case 19:{
return 64000000, nil
}
case 20:{
return 62000000, nil
}
case 21:{
return 47000000, nil
}
case 22:{
return 50000000, nil
}
}
chromosomeString := helpers.ConvertIntToString(chromosome)
return 0, errors.New("getRandomChromosomeBreakpoints called with invalid chromosome: " + chromosomeString)
}
chromosomeLength, err := getChromosomeLength()
if (err != nil) { return nil, err }
listOfRandomBreakpoints := make([]int64, 0)
// TODO: Take into account different recombination rate for each chromosome
// TODO: There are also breakpoint hotspots which we need to account for
// TODO: I read somewhere that recombination break points are less likely to occur within genes,
// meaning they are more likely to occur at the gene boundaries (codons)
// We step by 1,000,000 each time
// It would be more realistic if we did it in 1 integer increments, but it would be slower
for position := int64(0); position <= chromosomeLength; position += 1000000{
//From Wikipedia:
// A centimorgan (abbreviated cM) is a unit for measuring genetic linkage.
// It is defined as the distance between chromosome positions (loci) for which the expected
// average number of intervening chromosomal crossovers in a single generation is 0.01.
// One centimorgan corresponds to about 1 million base pairs in humans on average
//
// A chromosomal crossover == recombination breakpoint
//
// For every 1,000,000 base pairs, there is a 0.01 probability that there is a breakpoint
randomFloat := pseudorandomNumberGenerator.Float64()
if (randomFloat <= 0.01){
// This has a 0.01, or 1% probability of being true
listOfRandomBreakpoints = append(listOfRandomBreakpoints, position)
}
}
return listOfRandomBreakpoints, nil
}
// Map Structure: rsID -> Locus Value
offspringGenomesList := make([]map[int64]locusValue.LocusValue, 0)
for i:=0; i < 100; i++{
// This map stores the chromosome breakpoints for person1
// Map Structure: Chromosome -> List of breakpoints
person1ChromosomeBreakpointsMap := make(map[int][]int64)
// This map stores the chromosome breakpoints for person2
// Map Structure: Chromosome -> List of breakpoints
person2ChromosomeBreakpointsMap := make(map[int][]int64)
// This stores the locus values for this prospective offspring
// Map Structure: rsID -> Locus Value
prospectiveOffspringGenome := make(map[int64]locusValue.LocusValue)
for _, rsID := range lociList{
//Outputs:
// -bool: Allele is known
// -string: Locus base
// -error
getPersonAllele := func(personLociMap map[int64]locusValue.LocusValue, personBreakpointsMap map[int][]int64)(bool, string, error){
personLocusValue, exists := personLociMap[rsID]
if (exists == false){
return false, "", nil
}
personLocusBase1 := personLocusValue.Base1Value
personLocusBase2 := personLocusValue.Base1Value
personLocusIsPhased := personLocusValue.LocusIsPhased
if (personLocusIsPhased == false){
// Breakpoints are unnecessary
// We either choose base 1 or 2
randomBool := helpers.GetRandomBool()
if (randomBool == true){
return true, personLocusBase1, nil
}
return true, personLocusBase2, nil
}
// We have a phased locus
// We figure out which allele to use by seeing which allele gets inherited from our random breakpoints list
// We figure out the chromosome and position of this locus
locusMetadataExists, locusMetadataObject, err := locusMetadata.GetLocusMetadata(rsID)
if (err != nil) { return false, "", err }
if (locusMetadataExists == false){
rsIDString := helpers.ConvertInt64ToString(rsID)
return false, "", errors.New("getProspectiveOffspringGenomesList called with unknown rsID: " + rsIDString)
}
locusPosition := locusMetadataObject.Position
locusChromosome := locusMetadataObject.Chromosome
getPersonChromosomeBreakpointsList := func()([]int64, error){
breakpointsList, exists := personBreakpointsMap[locusChromosome]
if (exists == true){
return breakpointsList, nil
}
// We have to create a new breakpoints list
newBreakpointsList, err := getRandomChromosomeBreakpoints(locusChromosome)
if (err != nil) { return nil, err }
personBreakpointsMap[locusChromosome] = newBreakpointsList
return newBreakpointsList, nil
}
personBreakpointsList, err := getPersonChromosomeBreakpointsList()
if (err != nil) { return false, "", err }
getLocusListIndex := func()int{
for index, breakpoint := range personBreakpointsList{
if (int64(locusPosition) <= breakpoint){
return index
}
}
index := len(personBreakpointsList)
// This is reached if the final breakpoint in the list is less than the locus's position, or if there were no breakpoints
return index
}
locusListIndex := getLocusListIndex()
if (locusListIndex%2 == 0){
return true, personLocusBase1, nil
}
return true, personLocusBase2, nil
}
person1AlleleIsKnown, person1Allele, err := getPersonAllele(person1LociMap, person1ChromosomeBreakpointsMap)
if (err != nil) { return false, nil, err }
if (person1AlleleIsKnown == false){
continue
}
person2AlleleIsKnown, person2Allele, err := getPersonAllele(person2LociMap, person2ChromosomeBreakpointsMap)
if (err != nil) { return false, nil, err }
if (person2AlleleIsKnown == false){
continue
}
offspringLocusValue := locusValue.LocusValue{
Base1Value: person1Allele,
Base2Value: person2Allele,
LocusIsPhased: true,
}
prospectiveOffspringGenome[rsID] = offspringLocusValue
}
if (len(prospectiveOffspringGenome) == 0){
// We don't have any locations at which both people's genomes contain a locus value.
return false, nil, nil
}
offspringGenomesList = append(offspringGenomesList, prospectiveOffspringGenome)
}
return true, offspringGenomesList, nil
}
// We also use this function when calculating offspring probabilities between users in viewProfileGui.go // We also use this function when calculating offspring probabilities between users in viewProfileGui.go
//Outputs: //Outputs:
// -bool: Probability offspring has disease is known // -bool: Probability offspring has disease is known
@ -1271,40 +1384,181 @@ func GetOffspringPolygenicDiseaseLocusInfo(locusRiskWeightsMap map[string]int, l
//Outputs: //Outputs:
// -float64: Probability of offspring passing rule (0-1) // -bool: Any rules tested (if false, no offspring trait information is known)
// -int: Number of rules tested
// -map[[3]byte]int: Offspring probability of passing rules map
// Map Structure: Rule identifier -> Offspring probability of passing rule (1-100)
// -map[string]float64: Offspring average outcome scores map
// Map Structure: Outcome Name -> Offspring average outcome score
// -error // -error
func GetOffspringTraitRuleLocusInfo(locusRequiredBasePairsList []string, person1LocusBase1 string, person1LocusBase2 string, person2LocusBase1 string, person2LocusBase2 string)(float64, error){ func GetOffspringTraitInfo(traitObject traits.Trait, person1LocusValuesMap map[int64]locusValue.LocusValue, person2LocusValuesMap map[int64]locusValue.LocusValue)(bool, int, map[[3]byte]int, map[string]float64, error){
// We create the 4 options for the offspring's bases at this locus // First, we create 100 prospective offspring genomes.
offspringBasePairOutcome1 := person1LocusBase1 + ";" + person2LocusBase1 traitLociList := traitObject.LociList
offspringBasePairOutcome2 := person1LocusBase2 + ";" + person2LocusBase2
offspringBasePairOutcome3 := person1LocusBase1 + ";" + person2LocusBase2
offspringBasePairOutcome4 := person1LocusBase2 + ";" + person2LocusBase1
baseOutcomesList := []string{offspringBasePairOutcome1, offspringBasePairOutcome2, offspringBasePairOutcome3, offspringBasePairOutcome4} anyLocusValueExists, prospectiveOffspringGenomesList, err := getProspectiveOffspringGenomesList(traitLociList, person1LocusValuesMap, person2LocusValuesMap)
if (err != nil) { return false, 0, nil, nil, err }
numberOfOffspringOutcomesWhomPassRuleLocus := 0 if (anyLocusValueExists == false){
return false, 0, nil, nil, nil
for _, outcomeBasePair := range baseOutcomesList{
isValid := verifyBasePair(outcomeBasePair)
if (isValid == false){
return 0, errors.New("GetOffspringTraitRuleLocusInfo called with invalid locus base pair: " + outcomeBasePair)
} }
outcomePassesRuleLocus := slices.Contains(locusRequiredBasePairsList, outcomeBasePair) traitRulesList := traitObject.RulesList
if (outcomePassesRuleLocus == true){
numberOfOffspringOutcomesWhomPassRuleLocus += 1 // Map Structure: Rule Identifier -> Number of offspring who pass the rule (out of 100 prospective offspring)
offspringPassesRulesCountMap := make(map[[3]byte]int)
// We use this map to keep track of the rules for which we know every offspring's passes-rule status
// Map Structure: Rule Identifier -> Rule Object
offspringRulesWithKnownStatusMap := make(map[[3]byte]traits.TraitRule)
for offspringIndex, offspringGenomeMap := range prospectiveOffspringGenomesList{
// We iterate through rules to determine genome pair trait info
for _, ruleObject := range traitRulesList{
ruleIdentifierHex := ruleObject.RuleIdentifier
ruleIdentifier, err := encoding.DecodeHexStringTo3ByteArray(ruleIdentifierHex)
if (err != nil) { return false, 0, nil, nil, err }
if (offspringIndex != 0){
_, exists := offspringRulesWithKnownStatusMap[ruleIdentifier]
if (exists == false){
// We already tried to check a previous offspring's passes-rule status for this rule
// We know that the offspring's passes-rule status will be unknown for every prospective offspring
continue
} }
} }
offspringProbabilityOfPassingRuleLocus := float64(numberOfOffspringOutcomesWhomPassRuleLocus)/float64(4) // This is a list that describes the locus rsids and their values that must be fulfilled to pass the rule
ruleLocusObjectsList := ruleObject.LociList
return offspringProbabilityOfPassingRuleLocus, nil //Outputs:
// -bool: Offspring passes rule is known
// -bool: Offspring passes rule
getOffspringPassesRuleStatus := func()(bool, bool){
// If any rule locus status is unknown, then we consider the offspring-passes-rule status to be unknown,
// unless we know that there is a rule that the offspring does not pass
anyRuleIsUnknown := false
for _, ruleLocusObject := range ruleLocusObjectsList{
locusRSID := ruleLocusObject.LocusRSID
locusRequiredBasePairsList := ruleLocusObject.BasePairsList
offspringLocusValue, exists := offspringGenomeMap[locusRSID]
if (exists == false){
anyRuleIsUnknown = true
// We keep searching to see if there are any rules we know the offspring does not pass
continue
}
offspringBase1 := offspringLocusValue.Base1Value
offspringBase2 := offspringLocusValue.Base2Value
offspringBasePair := offspringBase1 + ";" + offspringBase2
offspringPassesRuleLocus := slices.Contains(locusRequiredBasePairsList, offspringBasePair)
if (offspringPassesRuleLocus == false){
// The offspring does not pass this rule locus
// Thus, the offspring does not pass the rule
return true, false
}
}
if (anyRuleIsUnknown == true){
// We don't know if the offspring passes the rule
return false, false
}
// The offspring passes the rule
return true, true
}
offspringPassesRuleIsKnown, offspringPassesRule := getOffspringPassesRuleStatus()
if (offspringPassesRuleIsKnown == false){
continue
}
offspringRulesWithKnownStatusMap[ruleIdentifier] = ruleObject
if (offspringPassesRule == true){
offspringPassesRulesCountMap[ruleIdentifier] += 1
}
}
}
// Map Structure: Rule Identifier -> Offspring Probability Of Passing Rule
// The map value stores the probability that the offspring will pass the rule
// This is a number between 0-100%
offspringProbabilityOfPassingRulesMap := make(map[[3]byte]int)
// Map Structure: Outcome Name -> Outcome Score
// Example: "Intolerant" -> 2.5
offspringAverageOutcomeScoresMap := make(map[string]float64)
for ruleIdentifier, ruleObject := range offspringRulesWithKnownStatusMap{
//Output:
// -int: Offspring probability of passing rule (0-100%)
getOffspringPercentageProbabilityOfPassingRule := func()int{
numberOfOffspringWhoPassRule, exists := offspringPassesRulesCountMap[ruleIdentifier]
if (exists == false){
// None of the offspring passed the rule
return 0
}
// There are 100 tested offspring
// Thus, the percentage of offspring who passed the rule is the same as the number of offspring who passed the rule
// The probability of the offspring passing the rule is the same as the percentage of offspring who passed the rule
return numberOfOffspringWhoPassRule
}
offspringPercentageProbabilityOfPassingRule := getOffspringPercentageProbabilityOfPassingRule()
offspringProbabilityOfPassingRulesMap[ruleIdentifier] = offspringPercentageProbabilityOfPassingRule
// This is the 0 - 1 probability value
offspringProbabilityOfPassingRule := float64(offspringPercentageProbabilityOfPassingRule)/100
ruleOutcomePointsMap := ruleObject.OutcomePointsMap
for outcomeName, outcomePointsEffect := range ruleOutcomePointsMap{
pointsToAdd := float64(outcomePointsEffect) * offspringProbabilityOfPassingRule
offspringAverageOutcomeScoresMap[outcomeName] += pointsToAdd
}
}
numberOfRulesTested := len(offspringProbabilityOfPassingRulesMap)
if (numberOfRulesTested == 0){
return false, 0, nil, nil, nil
}
traitOutcomesList := traitObject.OutcomesList
// We add all outcomes for which there were no points
for _, traitOutcome := range traitOutcomesList{
_, exists := offspringAverageOutcomeScoresMap[traitOutcome]
if (exists == false){
offspringAverageOutcomeScoresMap[traitOutcome] = 0
}
}
return true, numberOfRulesTested, offspringProbabilityOfPassingRulesMap, offspringAverageOutcomeScoresMap, nil
} }
// This function will retrieve the base pair of the locus from the input genome map // This function will retrieve the base pair of the locus from the input genome map
// We need this because each rsID has aliases, so we must sometimes check those aliases to find locus values // We need this because each rsID has aliases, so we must sometimes check those aliases to find locus values
// //
@ -1620,9 +1874,10 @@ func getPersonMonogenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRaw
} }
// At this point, we know that there are no homozygous variant mutations // At this point, we know that there are no homozygous variant mutations
// All variant mutations are heterozygous, meaning the other chromosome base is healthy // All variant mutations are heterozygous, meaning the other chromosome strand's base is healthy
// Probability is expressed as a float between 0 - 1 //Outputs:
// -bool: Person has disease
getPersonHasDiseaseBool := func()bool{ getPersonHasDiseaseBool := func()bool{
if (dominantOrRecessive == "Dominant"){ if (dominantOrRecessive == "Dominant"){
@ -1640,6 +1895,8 @@ func getPersonMonogenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRaw
return false return false
} }
// We know that there are at least 2 variants
if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){ if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){
// We know there is at least 1 variant mutation on each chromosome // We know there is at least 1 variant mutation on each chromosome
@ -1654,13 +1911,13 @@ func getPersonMonogenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRaw
return false return false
} }
if (numberOfVariants_Chromosome1 == 0 && numberOfVariants_Chromosome2 == 0){ // We know there are at least 2 variants
// We know there is at least 1 variant whose phase is unknown
// All variants have an unknown phase, and we know there are multiple of them. // If all mutations are on the same chromosome, the person does not have the disease.
// The person does not have the disease if all mutations are on the same chromosome // If at least 1 mutation exists on each chromosome, the person does have the disease.
// The person does have the disease if at least 1 mutation exists on each chromosome // Either way, we don't know enough to say if the person has the disease.
// Either way, we don't know enough to say if the person has the disease // We will report that they do not, because their genome does not conclusively say that they do.
// We will report that they do not, because their genome does not conclusively say that they do
// This is why phased genomes are useful and provide a more accurate reading // This is why phased genomes are useful and provide a more accurate reading
// TODO: Explain this to the user in the GUI // TODO: Explain this to the user in the GUI
// We must explain that unphased genomes will not detect disease sometimes // We must explain that unphased genomes will not detect disease sometimes
@ -1668,26 +1925,60 @@ func getPersonMonogenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRaw
return false return false
} }
// We know that there is at least 1 variant whose phase is known personHasDiseaseBool := getPersonHasDiseaseBool()
// We know that there are no variants whose phase is known which exist on both chromosomes
// We know there are at least some variants whose phase is unknown
// The person has the disease if the unknown-phase variants exist on the same chromosome as the ones which we know exist do // Output:
// This is the same as the last if statement, we report false even though they may actually have the disease // -float64: Probability person will pass a disease variant to their offspring (0-1)
getPersonWillPassVariantProbability := func()float64{
return false if (totalNumberOfVariants == 1){
// There is only 1 variant on any chromosome
// The probability of the person passing a variant is 50%.
return 0.5
} }
personHasDiseaseBool := getPersonHasDiseaseBool() // We know that there are at least 2 variants
if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){
// We know there is at least 1 variant mutation on each chromosome
// Therefore, the person will definitely pass a variant
return 1
}
if (numberOfVariants_UnknownChromosome == 0){
// We know that variants do not exist on both chromosomes, only on 1.
// Thus, the person has a 50% probability of passing a variant
return 0.5
}
// We know all variants are heterozygous // We know all variants are heterozygous
// Probability person will not pass any of n variants to their offspring: 1/(2^n) // From Wikipeia:
// Probability person will pass at least 1 of n variants to their offspring: 1 - (1/(2^n)) // The human genome contains somewhere between 19,000 and 20,000 protein-coding genes.
// These genes contain an average of 10 introns and the average size of an intron is about 6 kb (6,000 base pairs)
// This means that the average size of a protein-coding gene is about 62 kb (62,000 base pairs)
probabilityPersonWillPassAnyVariant := 1 - (1/(math.Pow(2, float64(totalNumberOfVariants)))) // The probability of a recombination breakpoint occurring within the gene is very small
// If there is 1 breakpoint every 100 million locations, on average, and each gene is 62,000 base pairs long,
// then the probability of a breakpoint occurring within a gene is 62,000/100,000,000 = 0.00062 = .062%
// Thus, we disregard the risk of a breakpoint occurring within a gene
// I also read somewhere that breakpoints are less likely to occurr within genes, which makes this likelihood even smaller
return personHasDiseaseBool, probabilityPersonWillPassAnyVariant, nil // At this point, we know there at at least 2 variants
// We know that at least 1 of the variants has an unknown phase
// We don't know if all of the variants belong to the same chromosome
// If variants exist on both chromosomes, then the probability of passing a variant is 100%
// If all variants exist on the same chromosome, then the probability of passing a variant is 50%
// We know there is at least a 50% chance of passing a variant, and possibly higher
return 0.5
}
personWillPassVariantProbability := getPersonWillPassVariantProbability()
return personHasDiseaseBool, personWillPassVariantProbability, nil
} }
personHasDisease, probabilityPersonWillPassAnyVariant, err := getPersonDiseaseInfo() personHasDisease, probabilityPersonWillPassAnyVariant, err := getPersonDiseaseInfo()
@ -1771,27 +2062,10 @@ func getPersonMonogenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRaw
for _, currentGenomeDiseaseAnalysisObject := range monogenicDiseaseInfoMap{ for _, currentGenomeDiseaseAnalysisObject := range monogenicDiseaseInfoMap{
// Outputs:
// -bool: Bases are known
// -bool: Base1 Has Variant
// -bool: Base2 Has Variant
getGenomeBasesInfo := func()(bool, bool, bool){
variantsInfoMap := currentGenomeDiseaseAnalysisObject.VariantsInfoMap variantsInfoMap := currentGenomeDiseaseAnalysisObject.VariantsInfoMap
variantInfoObject, exists := variantsInfoMap[variantIdentifier] variantInfoObject, exists := variantsInfoMap[variantIdentifier]
if (exists == false){ if (exists == false){
return false, false, false
}
currentBase1HasVariant := variantInfoObject.Base1HasVariant
currentBase2HasVariant := variantInfoObject.Base2HasVariant
return true, currentBase1HasVariant, currentBase2HasVariant
}
basesAreKnown, currentBase1HasVariant, currentBase2HasVariant := getGenomeBasesInfo()
if (basesAreKnown == false){
if (firstItemReached == true){ if (firstItemReached == true){
// A previous genome has information for this variant, and the current one does not // A previous genome has information for this variant, and the current one does not
return true, nil return true, nil
@ -1799,6 +2073,9 @@ func getPersonMonogenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRaw
continue continue
} }
currentBase1HasVariant := variantInfoObject.Base1HasVariant
currentBase2HasVariant := variantInfoObject.Base2HasVariant
if (firstItemReached == false){ if (firstItemReached == false){
base1HasVariant = currentBase1HasVariant base1HasVariant = currentBase1HasVariant
base2HasVariant = currentBase2HasVariant base2HasVariant = currentBase2HasVariant

View file

@ -320,72 +320,75 @@ func GetLocusMetadataObjectsListByChromosome(chromosome int)([]LocusMetadata, er
// -[]byte: File bytes // -[]byte: File bytes
getFileBytes := func()(bool, []byte){ getFileBytes := func()(bool, []byte){
if (chromosome == 1){ switch chromosome{
case 1:{
return true, LocusMetadataFile_Chromosome1 return true, LocusMetadataFile_Chromosome1
} }
if (chromosome == 2){ case 2:{
return true, LocusMetadataFile_Chromosome2 return true, LocusMetadataFile_Chromosome2
} }
if (chromosome == 3){ case 3:{
return true, LocusMetadataFile_Chromosome3 return true, LocusMetadataFile_Chromosome3
} }
if (chromosome == 4){ case 4:{
return true, LocusMetadataFile_Chromosome4 return true, LocusMetadataFile_Chromosome4
} }
if (chromosome == 5){ case 5:{
return true, LocusMetadataFile_Chromosome5 return true, LocusMetadataFile_Chromosome5
} }
if (chromosome == 6){ case 6:{
return true, LocusMetadataFile_Chromosome6 return true, LocusMetadataFile_Chromosome6
} }
if (chromosome == 7){ case 7:{
return true, LocusMetadataFile_Chromosome7 return true, LocusMetadataFile_Chromosome7
} }
if (chromosome == 8){ case 8:{
return true, LocusMetadataFile_Chromosome8 return true, LocusMetadataFile_Chromosome8
} }
if (chromosome == 9){ case 9:{
return true, LocusMetadataFile_Chromosome9 return true, LocusMetadataFile_Chromosome9
} }
if (chromosome == 10){ case 10:{
return true, LocusMetadataFile_Chromosome10 return true, LocusMetadataFile_Chromosome10
} }
if (chromosome == 11){ case 11:{
return true, LocusMetadataFile_Chromosome11 return true, LocusMetadataFile_Chromosome11
} }
if (chromosome == 12){ case 12:{
return true, LocusMetadataFile_Chromosome12 return true, LocusMetadataFile_Chromosome12
} }
if (chromosome == 13){ case 13:{
return true, LocusMetadataFile_Chromosome13 return true, LocusMetadataFile_Chromosome13
} }
if (chromosome == 14){ case 14:{
return true, LocusMetadataFile_Chromosome14 return true, LocusMetadataFile_Chromosome14
} }
if (chromosome == 15){ case 15:{
return true, LocusMetadataFile_Chromosome15 return true, LocusMetadataFile_Chromosome15
} }
if (chromosome == 16){ case 16:{
return true, LocusMetadataFile_Chromosome16 return true, LocusMetadataFile_Chromosome16
} }
if (chromosome == 17){ case 17:{
return true, LocusMetadataFile_Chromosome17 return true, LocusMetadataFile_Chromosome17
} }
//if (chromosome == 18){ //case 18:{
// return true, LocusMetadataFile_Chromosome18 // return true, LocusMetadataFile_Chromosome18
//} //}
if (chromosome == 19){ case 19:{
return true, LocusMetadataFile_Chromosome19 return true, LocusMetadataFile_Chromosome19
} }
if (chromosome == 20){ case 20:{
return true, LocusMetadataFile_Chromosome20 return true, LocusMetadataFile_Chromosome20
} }
if (chromosome == 21){ case 21:{
return true, LocusMetadataFile_Chromosome21 return true, LocusMetadataFile_Chromosome21
} }
if (chromosome == 22){ case 22:{
return true, LocusMetadataFile_Chromosome22 return true, LocusMetadataFile_Chromosome22
} }
}
return false, nil return false, nil
} }