diff --git a/Changelog.md b/Changelog.md index 2e775c8..26698ab 100644 --- a/Changelog.md +++ b/Changelog.md @@ -6,6 +6,7 @@ Small and insignificant changes may not be included in this log. ## Unversioned Changes +* Split the createGeneticAnalysis package into 2 packages: createPersonGeneticAnalysis and createCoupleGeneticAnalysis. - *Simon Sarasova* * Upgraded Circl to version 1.3.9. - *Simon Sarasova* * Renamed BroadcastTime/SentTime to CreationTime for every instance in which it is a better term to use. - *Simon Sarasova* * Added "User Has Disease" information to the View Profile - Monogenic Diseases page. - *Simon Sarasova* diff --git a/Contributors.md b/Contributors.md index baf69ed..929ce17 100644 --- a/Contributors.md +++ b/Contributors.md @@ -9,4 +9,4 @@ Many other people have written code for modules which are imported by Seekia. Th Name | Date Of First Commit | Number Of Commits --- | --- | --- -Simon Sarasova | June 13, 2023 | 255 \ No newline at end of file +Simon Sarasova | June 13, 2023 | 256 \ No newline at end of file diff --git a/documentation/Future-Plans.md b/documentation/Future-Plans.md index 6a894ff..2dd1a93 100644 --- a/documentation/Future-Plans.md +++ b/documentation/Future-Plans.md @@ -307,7 +307,7 @@ The current method for predicting polygenic disease risks and traits is not as i Our current model adds and subtracts the likelihood values of various SNPs that are reported to have an effect on polygenic diseases and traits. -A much better method is to train a neural net to predict traits and polygenic diseases on a large number of genes. There are methods that exist to find the set of genes that have an effect on each trait/disease. For example, height is said to be effected by ~10,000 SNPs. Many GWAS studies exist which report which genes are responsible for certain traits and diseases. These are the genes to feed into the neural net for each trait/disease. These are also the genes that users will share in their profiles. I have already started to try to build this system. See `geneticPrediction.go` for an implementation of trait prediction using neural networks, and `createGeneticAnalysis.go` for information on how offspring predictions would work. +A much better method is to train a neural net to predict traits and polygenic diseases on a large number of genes. There are methods that exist to find the set of genes that have an effect on each trait/disease. For example, height is said to be effected by ~10,000 SNPs. Many GWAS studies exist which report which genes are responsible for certain traits and diseases. These are the genes to feed into the neural net for each trait/disease. These are also the genes that users will share in their profiles. I have already started to try to build this system. See `geneticPrediction.go` for an implementation of trait prediction using neural networks, and `createCoupleGeneticAnalysis.go` for information on how offspring predictions would work. This method requires training data, which is largely unavailable for public use. We need fully open training data, not data that requires registration or permission to download. diff --git a/gui/viewProfileGui.go b/gui/viewProfileGui.go index 44d2ad8..c85121a 100644 --- a/gui/viewProfileGui.go +++ b/gui/viewProfileGui.go @@ -19,7 +19,8 @@ import "seekia/resources/geneticReferences/traits" import "seekia/internal/appMemory" import "seekia/internal/encoding" import "seekia/internal/genetics/companyAnalysis" -import "seekia/internal/genetics/createGeneticAnalysis" +import "seekia/internal/genetics/createCoupleGeneticAnalysis" +import "seekia/internal/genetics/createPersonGeneticAnalysis" import "seekia/internal/genetics/locusValue" import "seekia/internal/genetics/myChosenAnalysis" import "seekia/internal/genetics/myPeople" @@ -2731,7 +2732,7 @@ func setViewMateProfilePage_MonogenicDiseases(window fyne.Window, userOrOffsprin myDiseaseInfoExists, myProbabilityOfPassingAVariant, myNumberOfVariantsTested, err := getMyDiseaseInfo() if (err != nil) { return nil, err } - probabilityOffspringHasDiseaseIsKnown, probabilityOffspringHasDisease, probabilityOffspringHasVariantIsKnown, probabilityOffspringHasVariant, err := createGeneticAnalysis.GetOffspringMonogenicDiseaseProbabilities(diseaseIsDominantOrRecessive, userDiseaseInfoExists, userProbabilityOfPassingAVariant, myDiseaseInfoExists, myProbabilityOfPassingAVariant) + probabilityOffspringHasDiseaseIsKnown, probabilityOffspringHasDisease, probabilityOffspringHasVariantIsKnown, probabilityOffspringHasVariant, err := createCoupleGeneticAnalysis.GetOffspringMonogenicDiseaseProbabilities(diseaseIsDominantOrRecessive, userDiseaseInfoExists, userProbabilityOfPassingAVariant, myDiseaseInfoExists, myProbabilityOfPassingAVariant) if (err != nil) { return nil, err } getUserHasDiseaseString := func()string{ @@ -3010,7 +3011,7 @@ func setViewMateProfilePage_PolygenicDiseases(window fyne.Window, userOrOffsprin userDiseaseLocusValuesMap[locusRSID] = userLocusValue } - userDiseaseInfoIsKnown, userDiseaseRiskScore, userNumberOfLociTested, _, err := createGeneticAnalysis.GetPersonGenomePolygenicDiseaseInfo(diseaseLociList, userDiseaseLocusValuesMap, true) + userDiseaseInfoIsKnown, userDiseaseRiskScore, userNumberOfLociTested, _, err := createPersonGeneticAnalysis.GetPersonGenomePolygenicDiseaseInfo(diseaseLociList, userDiseaseLocusValuesMap, true) if (err != nil) { return nil, err } getUserDiseaseRiskScoreString := func()(string, error){ @@ -3030,7 +3031,7 @@ func setViewMateProfilePage_PolygenicDiseases(window fyne.Window, userOrOffsprin userDiseaseRiskScoreString, err := getUserDiseaseRiskScoreString() if (err != nil) { return nil, err } - anyOffspringLociTested, offspringDiseaseRiskScore, offspringNumberOfLociTested, _, offspringSampleRiskScoresList, err := createGeneticAnalysis.GetOffspringPolygenicDiseaseInfo(diseaseLociList, myDiseaseLocusValuesMap, userDiseaseLocusValuesMap) + anyOffspringLociTested, offspringDiseaseRiskScore, offspringNumberOfLociTested, _, offspringSampleRiskScoresList, err := createCoupleGeneticAnalysis.GetOffspringPolygenicDiseaseInfo(diseaseLociList, myDiseaseLocusValuesMap, userDiseaseLocusValuesMap) if (err != nil) { return nil, err } getOffspringDiseaseRiskScoreFormatted := func()(string, error){ @@ -3241,13 +3242,13 @@ func setViewMateProfilePage_PolygenicDiseaseLoci(window fyne.Window, diseaseName return } - anyUserLociTested, _, userNumberOfLociTested, userDiseaseLocusInfoMap, err := createGeneticAnalysis.GetPersonGenomePolygenicDiseaseInfo(diseaseLocusObjectsList, userDiseaseLocusValuesMap, true) + anyUserLociTested, _, userNumberOfLociTested, userDiseaseLocusInfoMap, err := createPersonGeneticAnalysis.GetPersonGenomePolygenicDiseaseInfo(diseaseLocusObjectsList, userDiseaseLocusValuesMap, true) if (err != nil) { setErrorEncounteredPage(window, err, previousPage) return } - anyOffspringLociTested, _, offspringNumberOfLociTested, offspringLociInfoMap, _, err := createGeneticAnalysis.GetOffspringPolygenicDiseaseInfo(diseaseLocusObjectsList, myDiseaseLocusValuesMap, userDiseaseLocusValuesMap) + anyOffspringLociTested, _, offspringNumberOfLociTested, offspringLociInfoMap, _, err := createCoupleGeneticAnalysis.GetOffspringPolygenicDiseaseInfo(diseaseLocusObjectsList, myDiseaseLocusValuesMap, userDiseaseLocusValuesMap) if (err != nil) { setErrorEncounteredPage(window, err, previousPage) return @@ -3648,7 +3649,7 @@ func setViewMateProfilePage_GeneticTraits(window fyne.Window, userOrOffspring st ruleLociList := traitRuleObject.LociList - userRuleStatusIsKnown, userPassesRule, err := createGeneticAnalysis.GetGenomePassesTraitRuleStatus(ruleLociList, userTraitLocusValuesMap, true) + userRuleStatusIsKnown, userPassesRule, err := createPersonGeneticAnalysis.GetGenomePassesTraitRuleStatus(ruleLociList, userTraitLocusValuesMap, true) if (err != nil) { return false, nil, 0, err } if (userRuleStatusIsKnown == false){ continue @@ -3700,7 +3701,7 @@ func setViewMateProfilePage_GeneticTraits(window fyne.Window, userOrOffspring st myTraitLocusValuesMap, _, _, _, _, err := readGeneticAnalysis.GetPersonTraitInfoFromGeneticAnalysis(myAnalysisObject, traitName, myGenomeIdentifier) if (err != nil) { return false, nil, 0, err } - anyRuleTested, offspringNumberOfRulesTested, _, offspringAverageOutcomeScoresMap, err := createGeneticAnalysis.GetOffspringTraitInfo(traitObject, myTraitLocusValuesMap, userTraitLocusValuesMap) + anyRuleTested, offspringNumberOfRulesTested, _, offspringAverageOutcomeScoresMap, err := createCoupleGeneticAnalysis.GetOffspringTraitInfo(traitObject, myTraitLocusValuesMap, userTraitLocusValuesMap) if (err != nil) { return false, nil, 0, err } if (anyRuleTested == false){ return false, nil, 0, nil @@ -3962,7 +3963,7 @@ func setViewMateProfilePage_TraitRules(window fyne.Window, traitName string, use return false, nil, nil } - anyOffspringRulesTested, _, offspringProbabilityOfPassingRulesMap, _, err := createGeneticAnalysis.GetOffspringTraitInfo(traitObject, myTraitLocusValuesMap, userTraitLocusValuesMap) + anyOffspringRulesTested, _, offspringProbabilityOfPassingRulesMap, _, err := createCoupleGeneticAnalysis.GetOffspringTraitInfo(traitObject, myTraitLocusValuesMap, userTraitLocusValuesMap) if (err != nil) { return false, nil, err } if (anyOffspringRulesTested == false){ return false, nil, nil @@ -3999,7 +4000,7 @@ func setViewMateProfilePage_TraitRules(window fyne.Window, traitName string, use ruleLociList := ruleObject.LociList - ruleStatusIsKnown, _, err := createGeneticAnalysis.GetGenomePassesTraitRuleStatus(ruleLociList, userTraitLocusValuesMap, true) + ruleStatusIsKnown, _, err := createPersonGeneticAnalysis.GetGenomePassesTraitRuleStatus(ruleLociList, userTraitLocusValuesMap, true) if (err != nil) { return 0, err } if (ruleStatusIsKnown == true){ numberOfRulesTested += 1 @@ -4092,7 +4093,7 @@ func setViewMateProfilePage_TraitRules(window fyne.Window, traitName string, use getUserPassesRuleString := func()(string, error){ - userRuleStatusIsKnown, userPassesRule, err := createGeneticAnalysis.GetGenomePassesTraitRuleStatus(ruleLociList, userTraitLocusValuesMap, true) + userRuleStatusIsKnown, userPassesRule, err := createPersonGeneticAnalysis.GetGenomePassesTraitRuleStatus(ruleLociList, userTraitLocusValuesMap, true) if (err != nil) { return "", err } if (userRuleStatusIsKnown == false){ diff --git a/internal/genetics/createGeneticAnalysis/createGeneticAnalysis.go b/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis.go similarity index 59% rename from internal/genetics/createGeneticAnalysis/createGeneticAnalysis.go rename to internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis.go index b99104a..bfeeaab 100644 --- a/internal/genetics/createGeneticAnalysis/createGeneticAnalysis.go +++ b/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis.go @@ -1,11 +1,11 @@ -// createGeneticAnalysis provides functions to create a genetic analysis -// These are performed on one or more genome files. -// They produce 3 kinds of results: Monogenic Diseases, Polygenic Diseases and Traits -// They can be performed on a Person or a Couple -// Couple analyses provide an analysis of the prospective offspring of the couple +// createCoupleGeneticAnalysis provides functions to create a Couple genetic analysis +// Couple analyses provide an analysis of the prospective offspring of a pair of people +// These analyses are performed on one or more genome files. +// Analyses contain 3 categories of results: Monogenic Diseases, Polygenic Diseases and Traits +// Use createPersonGeneticAnalysis.go to create Person analyses -package createGeneticAnalysis +package createCoupleGeneticAnalysis // Disclaimer: I am a novice in the ways of genetics. This package could be flawed in numerous ways. @@ -24,16 +24,13 @@ package createGeneticAnalysis // More offspring will take longer, but will yield a more accurate trait probability. // Seekia will show the the average trait result and a chart showing the trait results for all created offspring. -// TODO: Add the ability to weight different genome files based on their reliability. -// Some files are much more accurate because they record each location many times. - - import "seekia/resources/geneticReferences/locusMetadata" import "seekia/resources/geneticReferences/monogenicDiseases" import "seekia/resources/geneticReferences/polygenicDiseases" import "seekia/resources/geneticReferences/traits" import "seekia/internal/encoding" +import "seekia/internal/genetics/createPersonGeneticAnalysis" import "seekia/internal/genetics/geneticAnalysis" import "seekia/internal/genetics/locusValue" import "seekia/internal/genetics/prepareRawGenomes" @@ -41,135 +38,10 @@ import "seekia/internal/helpers" import "errors" import mathRand "math/rand/v2" -import "strings" import "slices" import "maps" -func verifyBasePair(inputBasePair string)bool{ - - base1, base2, delimiterFound := strings.Cut(inputBasePair, ";") - if (delimiterFound == false){ - return false - } - - // I = Insertion - // D = Deletion - - validBasesList := []string{"C", "A", "T", "G", "I", "D"} - - baseIsValid := slices.Contains(validBasesList, base1) - if (baseIsValid == false){ - return false - } - - baseIsValid = slices.Contains(validBasesList, base2) - if (baseIsValid == false){ - return false - } - - return true -} - -//Outputs: -// -bool: Process completed (it was not stopped manually before completion) -// -string: New Genetic analysis string (Encoded in MessagePack) -// -error -func CreatePersonGeneticAnalysis(genomesList []prepareRawGenomes.RawGenomeWithMetadata, updatePercentageCompleteFunction func(int)error, checkIfProcessIsStopped func()bool)(bool, string, error){ - - prepareRawGenomesUpdatePercentageCompleteFunction := func(newPercentage int)error{ - - newPercentageCompletion, err := helpers.ScaleNumberProportionally(true, newPercentage, 0, 100, 0, 50) - if (err != nil){ return err } - - err = updatePercentageCompleteFunction(newPercentageCompletion) - if (err != nil) { return err } - - return nil - } - - genomesWithMetadataList, allRawGenomeIdentifiersList, multipleGenomesExist, onlyExcludeConflictsGenomeIdentifier, onlyIncludeSharedGenomeIdentifier, err := prepareRawGenomes.GetGenomesWithMetadataListFromRawGenomesList(genomesList, prepareRawGenomesUpdatePercentageCompleteFunction) - if (err != nil) { return false, "", err } - - newGeneticAnalysisObject := geneticAnalysis.PersonAnalysis{ - AnalysisVersion: 1, - CombinedGenomesExist: multipleGenomesExist, - AllRawGenomeIdentifiersList: allRawGenomeIdentifiersList, - } - - if (multipleGenomesExist == true){ - - newGeneticAnalysisObject.OnlyExcludeConflictsGenomeIdentifier = onlyExcludeConflictsGenomeIdentifier - newGeneticAnalysisObject.OnlyIncludeSharedGenomeIdentifier = onlyIncludeSharedGenomeIdentifier - } - - processIsStopped := checkIfProcessIsStopped() - if (processIsStopped == true){ - return false, "", nil - } - - monogenicDiseasesList, err := monogenicDiseases.GetMonogenicDiseaseObjectsList() - if (err != nil) { return false, "", err } - - // Map Structure: Disease Name -> PersonMonogenicDiseaseInfo - analysisMonogenicDiseasesMap := make(map[string]geneticAnalysis.PersonMonogenicDiseaseInfo) - - for _, monogenicDiseaseObject := range monogenicDiseasesList{ - - diseaseName := monogenicDiseaseObject.DiseaseName - - personDiseaseAnalysisObject, err := getPersonMonogenicDiseaseAnalysis(genomesWithMetadataList, monogenicDiseaseObject) - if (err != nil) { return false, "", err } - - analysisMonogenicDiseasesMap[diseaseName] = personDiseaseAnalysisObject - } - - newGeneticAnalysisObject.MonogenicDiseasesMap = analysisMonogenicDiseasesMap - - polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList() - if (err != nil) { return false, "", err } - - // Map Structure: Disease Name -> PersonPolygenicDiseaseInfo - analysisPolygenicDiseasesMap := make(map[string]geneticAnalysis.PersonPolygenicDiseaseInfo) - - for _, diseaseObject := range polygenicDiseaseObjectsList{ - - personDiseaseAnalysisObject, err := getPersonPolygenicDiseaseAnalysis(genomesWithMetadataList, diseaseObject) - if (err != nil) { return false, "", err } - - diseaseName := diseaseObject.DiseaseName - - analysisPolygenicDiseasesMap[diseaseName] = personDiseaseAnalysisObject - } - - newGeneticAnalysisObject.PolygenicDiseasesMap = analysisPolygenicDiseasesMap - - traitObjectsList, err := traits.GetTraitObjectsList() - if (err != nil) { return false, "", err } - - // Map Structure: Trait Name -> PersonTraitInfo - analysisTraitsMap := make(map[string]geneticAnalysis.PersonTraitInfo) - - for _, traitObject := range traitObjectsList{ - - personTraitAnalysisObject, err := getPersonTraitAnalysis(genomesWithMetadataList, traitObject) - if (err != nil) { return false, "", err } - - traitName := traitObject.TraitName - - analysisTraitsMap[traitName] = personTraitAnalysisObject - } - - newGeneticAnalysisObject.TraitsMap = analysisTraitsMap - - analysisBytes, err := encoding.EncodeMessagePackBytes(newGeneticAnalysisObject) - if (err != nil) { return false, "", err } - - analysisString := string(analysisBytes) - - return true, analysisString, nil -} - //Outputs: // -bool: Process completed (was not manually stopped mid-way) // -string: Couple genetic analysis string (encoded in MessagePack) @@ -294,10 +166,10 @@ func CreateCoupleGeneticAnalysis(person1GenomesList []prepareRawGenomes.RawGenom variantsList := diseaseObject.VariantsList diseaseIsDominantOrRecessive := diseaseObject.DominantOrRecessive - person1DiseaseAnalysisObject, err := getPersonMonogenicDiseaseAnalysis(person1GenomesWithMetadataList, diseaseObject) + person1DiseaseAnalysisObject, err := createPersonGeneticAnalysis.GetPersonMonogenicDiseaseAnalysis(person1GenomesWithMetadataList, diseaseObject) if (err != nil) { return false, "", err } - person2DiseaseAnalysisObject, err := getPersonMonogenicDiseaseAnalysis(person2GenomesWithMetadataList, diseaseObject) + person2DiseaseAnalysisObject, err := createPersonGeneticAnalysis.GetPersonMonogenicDiseaseAnalysis(person2GenomesWithMetadataList, diseaseObject) if (err != nil) { return false, "", err } // This map stores the number of variants tested in each person's genome @@ -608,10 +480,10 @@ func CreateCoupleGeneticAnalysis(person1GenomesList []prepareRawGenomes.RawGenom diseaseName := diseaseObject.DiseaseName diseaseLociList := diseaseObject.LociList - person1DiseaseAnalysisObject, err := getPersonPolygenicDiseaseAnalysis(person1GenomesWithMetadataList, diseaseObject) + person1DiseaseAnalysisObject, err := createPersonGeneticAnalysis.GetPersonPolygenicDiseaseAnalysis(person1GenomesWithMetadataList, diseaseObject) if (err != nil) { return false, "", err } - person2DiseaseAnalysisObject, err := getPersonPolygenicDiseaseAnalysis(person2GenomesWithMetadataList, diseaseObject) + person2DiseaseAnalysisObject, err := createPersonGeneticAnalysis.GetPersonPolygenicDiseaseAnalysis(person2GenomesWithMetadataList, diseaseObject) if (err != nil) { return false, "", err } // This map stores the polygenic disease info for each genome pair @@ -756,10 +628,10 @@ func CreateCoupleGeneticAnalysis(person1GenomesList []prepareRawGenomes.RawGenom traitName := traitObject.TraitName - person1TraitAnalysisObject, err := getPersonTraitAnalysis(person1GenomesWithMetadataList, traitObject) + person1TraitAnalysisObject, err := createPersonGeneticAnalysis.GetPersonTraitAnalysis(person1GenomesWithMetadataList, traitObject) if (err != nil) { return false, "", err } - person2TraitAnalysisObject, err := getPersonTraitAnalysis(person2GenomesWithMetadataList, traitObject) + person2TraitAnalysisObject, err := createPersonGeneticAnalysis.GetPersonTraitAnalysis(person2GenomesWithMetadataList, traitObject) if (err != nil) { return false, "", err } // This map stores the trait info for each genome pair @@ -1001,6 +873,11 @@ func GetOffspringPolygenicDiseaseInfo_Fast(diseaseLociList []polygenicDiseases.D return false, 0, 0, nil } + // I = Insertion + // D = Deletion + + validAllelesList := []string{"C", "A", "T", "G", "I", "D"} + numberOfLociTested := 0 offspringSummedRiskWeights := 0 @@ -1014,20 +891,37 @@ func GetOffspringPolygenicDiseaseInfo_Fast(diseaseLociList []polygenicDiseases.D locusMinimumWeight := locusObject.MinimumRiskWeight locusMaximumWeight := locusObject.MaximumRiskWeight - person1LocusValueFound, person1LocusBase1Value, person1LocusBase2Value, _, _, err := getLocusValueFromGenomeMap(true, person1LocusValuesMap, locusRSID) + person1LocusValueFound, person1LocusBase1Value, person1LocusBase2Value, _, _, err := createPersonGeneticAnalysis.GetLocusValueFromGenomeMap(true, person1LocusValuesMap, locusRSID) if (err != nil) { return false, 0, 0, err } if (person1LocusValueFound == false){ // None of the offspring will have a value for this locus continue } - person2LocusValueFound, person2LocusBase1Value, person2LocusBase2Value, _, _, err := getLocusValueFromGenomeMap(true, person2LocusValuesMap, locusRSID) + person2LocusValueFound, person2LocusBase1Value, person2LocusBase2Value, _, _, err := createPersonGeneticAnalysis.GetLocusValueFromGenomeMap(true, person2LocusValuesMap, locusRSID) if (err != nil) { return false, 0, 0, err } if (person2LocusValueFound == false){ // None of the offspring will have a value for this locus continue } + baseIsValid := slices.Contains(validAllelesList, person1LocusBase1Value) + if (baseIsValid == false){ + return false, 0, 0, errors.New("GetOffspringPolygenicDiseaseInfo_Fast called with genomeMap containing invalid locus value base: " + person1LocusBase1Value) + } + baseIsValid = slices.Contains(validAllelesList, person1LocusBase2Value) + if (baseIsValid == false){ + return false, 0, 0, errors.New("GetOffspringPolygenicDiseaseInfo_Fast called with genomeMap containing invalid locus value base: " + person1LocusBase2Value) + } + baseIsValid = slices.Contains(validAllelesList, person2LocusBase1Value) + if (baseIsValid == false){ + return false, 0, 0, errors.New("GetOffspringPolygenicDiseaseInfo_Fast called with genomeMap containing invalid locus value base: " + person2LocusBase1Value) + } + baseIsValid = slices.Contains(validAllelesList, person2LocusBase2Value) + if (baseIsValid == false){ + return false, 0, 0, errors.New("GetOffspringPolygenicDiseaseInfo_Fast called with genomeMap containing invalid locus value base: " + person2LocusBase2Value) + } + numberOfLociTested += 1 offspringBasePairOutcome1 := person1LocusBase1Value + ";" + person2LocusBase1Value @@ -1041,11 +935,6 @@ func GetOffspringPolygenicDiseaseInfo_Fast(diseaseLociList []polygenicDiseases.D for _, outcomeBasePair := range baseOutcomesList{ - isValid := verifyBasePair(outcomeBasePair) - if (isValid == false){ - return false, 0, 0, errors.New("GetOffspringPolygenicDiseaseInfo_Fast called with genomeMap containing invalid locus value base pair: " + outcomeBasePair) - } - offspringOutcomeRiskWeight, exists := locusRiskWeightsMap[outcomeBasePair] if (exists == false){ // We do not know the risk weight for this base pair @@ -1075,6 +964,7 @@ func GetOffspringPolygenicDiseaseInfo_Fast(diseaseLociList []polygenicDiseases.D return true, offspringAverageDiseaseRiskScore, numberOfLociTested, nil } + //Outputs: // -bool: Any loci tested (if false, no offspring polygenic disease information is known) // -int: Offspring Risk Score (Value between 0-10) @@ -1160,14 +1050,14 @@ func GetOffspringPolygenicDiseaseInfo(diseaseLociList []polygenicDiseases.Diseas locusMinimumWeight := locusObject.MinimumRiskWeight locusMaximumWeight := locusObject.MaximumRiskWeight - basePairValueFound, locusBase1Value, locusBase2Value, _, _, err := getLocusValueFromGenomeMap(true, offspringGenomeMap, locusRSID) + basePairValueFound, locusBase1Value, locusBase2Value, _, _, err := createPersonGeneticAnalysis.GetLocusValueFromGenomeMap(true, offspringGenomeMap, locusRSID) if (err != nil) { return false, 0, 0, nil, nil, err } if (basePairValueFound == false){ // None of the offspring will have a value for this locus continue } - locusRiskWeight, locusOddsRatioIsKnown, locusOddsRatio, err := getGenomePolygenicDiseaseLocusRiskInfo(locusRiskWeightsMap, locusOddsRatiosMap, locusBase1Value, locusBase2Value) + locusRiskWeight, locusOddsRatioIsKnown, locusOddsRatio, err := createPersonGeneticAnalysis.GetGenomePolygenicDiseaseLocusRiskInfo(locusRiskWeightsMap, locusOddsRatiosMap, locusBase1Value, locusBase2Value) if (err != nil) { return false, 0, 0, nil, nil, err } offspringLocusInfoSumsObject.SummedLocusRiskWeights += locusRiskWeight @@ -1308,7 +1198,7 @@ func GetOffspringTraitInfo(traitObject traits.Trait, person1LocusValuesMap map[i // This is a list that describes the locus rsids and their values that must be fulfilled to pass the rule ruleLocusObjectsList := ruleObject.LociList - offspringPassesRuleIsKnown, offspringPassesRule, err := GetGenomePassesTraitRuleStatus(ruleLocusObjectsList, offspringGenomeMap, false) + offspringPassesRuleIsKnown, offspringPassesRule, err := createPersonGeneticAnalysis.GetGenomePassesTraitRuleStatus(ruleLocusObjectsList, offspringGenomeMap, false) if (err != nil){ return false, 0, nil, nil, err } if (offspringPassesRuleIsKnown == false){ continue @@ -1656,1026 +1546,4 @@ func getProspectiveOffspringGenomesList(lociList []int64, person1LociMap map[int return true, offspringGenomesList, nil } -//Outputs: -// -geneticAnalysis.PersonMonogenicDiseaseInfo: Monogenic disease analysis object -// -error -func getPersonMonogenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, diseaseObject monogenicDiseases.MonogenicDisease)(geneticAnalysis.PersonMonogenicDiseaseInfo, error){ - emptyDiseaseInfoObject := geneticAnalysis.PersonMonogenicDiseaseInfo{} - - dominantOrRecessive := diseaseObject.DominantOrRecessive - variantsList := diseaseObject.VariantsList - - // We use this map to keep track of which RSIDs corresponds to each variant - // We also use it to have a map of all variants for the disease - // Map Structure: Variant Identifier -> []rsID - variantRSIDsMap := make(map[[3]byte][]int64) - - // This map stores all rsIDs for this monogenic disease - // These are locations in the disease's gene which, if mutated, are known to cause the disease - // We use this map to avoid duplicate rsIDs, because one rsID can have multiple variants which belong to it - // We also store all alias rsIDs in this map - allRSIDsMap := make(map[int64]struct{}) - - for _, variantObject := range variantsList{ - - variantIdentifierHex := variantObject.VariantIdentifier - - variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex) - if (err != nil) { return emptyDiseaseInfoObject, err } - - variantRSID := variantObject.VariantRSID - - variantRSIDsList := []int64{variantRSID} - - // We add aliases to variantRSIDsList - - anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(variantRSID) - if (err != nil) { return emptyDiseaseInfoObject, err } - if (anyAliasesExist == true){ - variantRSIDsList = append(variantRSIDsList, rsidAliasesList...) - } - - variantRSIDsMap[variantIdentifier] = variantRSIDsList - - for _, rsID := range variantRSIDsList{ - allRSIDsMap[rsID] = struct{}{} - } - } - - // Now we create a new map without any rsID aliases - // Each rsID in this map represents a unique locus on the genome - // Each rsID may have aliases, but they are not included in this map - allUniqueRSIDsMap := make(map[int64]struct{}) - - for rsID, _ := range allRSIDsMap{ - - anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(rsID) - if (err != nil) { return emptyDiseaseInfoObject, err } - if (anyAliasesExist == false){ - allUniqueRSIDsMap[rsID] = struct{}{} - continue - } - - // We see if we already added an alias of this rsID to the map - - checkIfAliasAlreadyExists := func()bool{ - - for _, rsIDAlias := range rsidAliasesList{ - _, exists := allUniqueRSIDsMap[rsIDAlias] - if (exists == true){ - return true - } - } - return false - } - - aliasAlreadyExists := checkIfAliasAlreadyExists() - if (aliasAlreadyExists == true){ - // We already added this alias - continue - } - allUniqueRSIDsMap[rsID] = struct{}{} - } - - // Map Structure: Genome Identifier -> PersonGenomeMonogenicDiseaseInfo - monogenicDiseaseInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomeMonogenicDiseaseInfo) - - for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{ - - genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier - genomeMap := genomeWithMetadataObject.GenomeMap - - // This stores all variant info for this genome - // Map Structure: Variant Identifier -> PersonGenomeMonogenicDiseaseVariantInfo - variantsInfoMap := make(map[[3]byte]geneticAnalysis.PersonGenomeMonogenicDiseaseVariantInfo) - - for _, variantObject := range variantsList{ - - variantIdentifierHex := variantObject.VariantIdentifier - - variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex) - if (err != nil) { return emptyDiseaseInfoObject, err } - - variantRSID := variantObject.VariantRSID - - basePairValueFound, base1Value, base2Value, locusIsPhased, _, err := getLocusValueFromGenomeMap(true, genomeMap, variantRSID) - if (err != nil) { return emptyDiseaseInfoObject, err } - if (basePairValueFound == false){ - - // This genome does not contain info for this variant - // We skip it - continue - } - - // This genome has at least 1 variant - - variantDefectiveBase := variantObject.DefectiveBase - - getBaseIsVariantMutationBool := func(inputBase string)bool{ - - if (inputBase == variantDefectiveBase){ - return true - } - // Base could be mutated to a different unhealthy base - // That mutation could be a neutral/healthier change - // We only care about this specific variant - return false - } - - base1IsDefective := getBaseIsVariantMutationBool(base1Value) - base2IsDefective := getBaseIsVariantMutationBool(base2Value) - - newDiseaseVariantInfoObject := geneticAnalysis.PersonGenomeMonogenicDiseaseVariantInfo{ - Base1HasVariant: base1IsDefective, - Base2HasVariant: base2IsDefective, - LocusIsPhased: locusIsPhased, - } - - variantsInfoMap[variantIdentifier] = newDiseaseVariantInfoObject - - //TODO: Add LocusIsPhased to readGeneticAnalysis package - } - - // We are done adding variant information for the genome - // Now we determine probability that user will pass a disease variant to offspring, and if the user has the disease - - numberOfVariantsTested := len(variantsInfoMap) - - if (numberOfVariantsTested == 0){ - // We don't know anything about this genome's disease risk for this disease - // We won't add any information to the map - continue - } - - // This stores the number of loci that were tested - // Each locus can have multiple potential variants - numberOfLociTested := 0 - - // This stores the number of tested loci that are phased - // A higher number means that the results are more potentially more accurate - // It is only more accurate if multiple heterozygous variants on seperate loci exist. - numberOfPhasedLoci := 0 - - for rsID, _ := range allUniqueRSIDsMap{ - - locusValueExists, _, _, locusIsPhased, _, err := getLocusValueFromGenomeMap(true, genomeMap, rsID) - if (err != nil) { return emptyDiseaseInfoObject, err } - if (locusValueExists == false){ - continue - } - - numberOfLociTested += 1 - - if (locusIsPhased == true){ - numberOfPhasedLoci += 1 - } - } - - // Outputs: - // -bool: Person has disease - // -float64: Probability Person will pass a defect (variant) to offspring (0-1) - // -error - getPersonDiseaseInfo := func()(bool, float64, error){ - - // These variables are used to count the number of defective variants that exist on each chromosome - numberOfVariants_Chromosome1 := 0 - numberOfVariants_Chromosome2 := 0 - numberOfVariants_UnknownChromosome := 0 - - // We use this map to keep track of how many mutations exist for each rsID - // This allows us to know if 2 different variant mutations exist for a single rsID - // For example, base1 is a different deleterious mutation than base2 - // If this ever happens, we know that the user has the disease, - // because both copies of the gene locus are defective. - rsidMutationsMap := make(map[int64]int) - - for variantIdentifier, variantInfoObject := range variantsInfoMap{ - - locusIsPhasedStatus := variantInfoObject.LocusIsPhased - - base1HasVariant := variantInfoObject.Base1HasVariant - base2HasVariant := variantInfoObject.Base2HasVariant - - if (base1HasVariant == false && base2HasVariant == false){ - // Neither chromosome contains the variant mutation. - continue - } - - if (base1HasVariant == true && base2HasVariant == true){ - // Both chromosomes contain the same variant mutation. - // Person has the disease. - // Person will definitely pass disease variant to offspring. - return true, 1, nil - } - - // We know that this variant exists on 1 of the bases, but not both. - - variantRSIDsList, exists := variantRSIDsMap[variantIdentifier] - if (exists == false){ - return false, 0, errors.New("variantRSIDsMap missing variantIdentifier.") - } - - for _, rsID := range variantRSIDsList{ - rsidMutationsMap[rsID] += 1 - } - - if (locusIsPhasedStatus == true){ - - if (base1HasVariant == true){ - numberOfVariants_Chromosome1 += 1 - } - if (base2HasVariant == true){ - numberOfVariants_Chromosome2 += 1 - } - } else { - - if (base1HasVariant == true || base2HasVariant == true){ - numberOfVariants_UnknownChromosome += 1 - } - } - } - - totalNumberOfVariants := numberOfVariants_Chromosome1 + numberOfVariants_Chromosome2 + numberOfVariants_UnknownChromosome - - if (totalNumberOfVariants == 0){ - // Person does not have any disease variants. - // They do not have the disease, and have no chance of passing a disease variant - return false, 0, nil - } - - // Now we check to see if there are any loci which have 2 different variants, one for each base - - for _, numberOfMutations := range rsidMutationsMap{ - - if (numberOfMutations >= 2){ - // Person has 2 mutations on the same location - // They must have the disease, and will definitely pass a variant to their offspring - return true, 1, nil - } - } - - // At this point, we know that there are no homozygous variant mutations - // All variant mutations are heterozygous, meaning the other chromosome strand's base is healthy - - //Outputs: - // -bool: Person has disease - getPersonHasDiseaseBool := func()bool{ - - if (dominantOrRecessive == "Dominant"){ - // Only 1 variant is needed for the person to have the disease - // We know they have at least 1 variant - return true - } - - // dominantOrRecessive == "Recessive" - - if (totalNumberOfVariants == 1){ - // There is only 1 variant in total. - // This single variant cannot exist on both chromosomes. - // The person does not have the disease - return false - } - - // We know that there are at least 2 variants - - if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){ - - // We know there is at least 1 variant mutation on each chromosome - // Therefore, the person has the disease - return true - } - - if (numberOfVariants_UnknownChromosome == 0){ - - // We know that variants do not exist on both chromosomes, only on 1. - // Thus, the person does not have the disease - return false - } - - // We know there are at least 2 variants - // We know there is at least 1 variant whose phase is unknown - - // If all mutations are on the same chromosome, the person does not have the disease. - // If at least 1 mutation exists on each chromosome, the person does have the disease. - // Either way, we don't know enough to say if the person has the disease. - // We will report that they do not, because their genome does not conclusively say that they do. - // This is why phased genomes are useful and provide a more accurate reading - // TODO: Explain this to the user in the GUI - // We must explain that unphased genomes will not detect disease sometimes - - return false - } - - personHasDiseaseBool := getPersonHasDiseaseBool() - - // Output: - // -float64: Probability person will pass a disease variant to their offspring (0-1) - getPersonWillPassVariantProbability := func()float64{ - - if (totalNumberOfVariants == 1){ - - // There is only 1 variant on any chromosome - // The probability of the person passing a variant is 50%. - return 0.5 - } - - // We know that there are at least 2 variants - - if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){ - - // We know there is at least 1 variant mutation on each chromosome - // Therefore, the person will definitely pass a variant - return 1 - } - if (numberOfVariants_UnknownChromosome == 0){ - - // We know that variants do not exist on both chromosomes, only on 1. - // Thus, the person has a 50% probability of passing a variant - return 0.5 - } - - // We know all variants are heterozygous - - // From Wikipeia: - // The human genome contains somewhere between 19,000 and 20,000 protein-coding genes. - // These genes contain an average of 10 introns and the average size of an intron is about 6 kb (6,000 base pairs) - // This means that the average size of a protein-coding gene is about 62 kb (62,000 base pairs) - - // The probability of a recombination breakpoint occurring within the gene is very small - // If there is 1 breakpoint every 100 million locations, on average, and each gene is 62,000 base pairs long, - // then the probability of a breakpoint occurring within a gene is 62,000/100,000,000 = 0.00062 = .062% - // Thus, we disregard the risk of a breakpoint occurring within a gene - // I also read somewhere that breakpoints are less likely to occurr within genes, which makes this likelihood even smaller - - // At this point, we know there at at least 2 variants - // We know that at least 1 of the variants has an unknown phase - // We don't know if all of the variants belong to the same chromosome - // If variants exist on both chromosomes, then the probability of passing a variant is 100% - // If all variants exist on the same chromosome, then the probability of passing a variant is 50% - // We know there is at least a 50% chance of passing a variant, and possibly higher - - return 0.5 - } - - personWillPassVariantProbability := getPersonWillPassVariantProbability() - - return personHasDiseaseBool, personWillPassVariantProbability, nil - } - - personHasDisease, probabilityPersonWillPassAnyVariant, err := getPersonDiseaseInfo() - if (err != nil) { return emptyDiseaseInfoObject, err } - - percentageProbabilityPersonWillPassADiseaseVariant := int(probabilityPersonWillPassAnyVariant * 100) - - diseaseAnalysisObject := geneticAnalysis.PersonGenomeMonogenicDiseaseInfo{ - PersonHasDisease: personHasDisease, - NumberOfVariantsTested: numberOfVariantsTested, - NumberOfLociTested: numberOfLociTested, - NumberOfPhasedLoci: numberOfPhasedLoci, - ProbabilityOfPassingADiseaseVariant: percentageProbabilityPersonWillPassADiseaseVariant, - VariantsInfoMap: variantsInfoMap, - } - - monogenicDiseaseInfoMap[genomeIdentifier] = diseaseAnalysisObject - } - - personMonogenicDiseaseInfoObject := geneticAnalysis.PersonMonogenicDiseaseInfo{ - MonogenicDiseaseInfoMap: monogenicDiseaseInfoMap, - } - - if (len(monogenicDiseaseInfoMap) <= 1){ - // We do not need to check for conflicts, there is only <=1 genome with disease information - // Nothing left to do. Analysis is complete. - return personMonogenicDiseaseInfoObject, nil - } - - // We check for conflicts - - getConflictExistsBool := func()(bool, error){ - - firstItemReached := false - - personHasDisease := false - probabilityOfPassingAVariant := 0 - - for _, currentGenomeDiseaseAnalysisObject := range monogenicDiseaseInfoMap{ - - currentGenomePersonHasDisease := currentGenomeDiseaseAnalysisObject.PersonHasDisease - currentGenomeProbabilityOfPassingAVariant := currentGenomeDiseaseAnalysisObject.ProbabilityOfPassingADiseaseVariant - - if (firstItemReached == false){ - personHasDisease = currentGenomePersonHasDisease - probabilityOfPassingAVariant = currentGenomeProbabilityOfPassingAVariant - firstItemReached = true - continue - } - - if (currentGenomePersonHasDisease != personHasDisease){ - return true, nil - } - if (currentGenomeProbabilityOfPassingAVariant != probabilityOfPassingAVariant){ - return true, nil - } - } - - // Now we test variants for conflicts - // We are only doing this to see if there are variants which one genome has and another doesn't - // For example, the analysis results say that you have a 50% chance of passing a variant for both genomes, but - // they have detected a different variant for each genome. - // This means that your real risk of passing a variant may actually be higher, and you are more likely to have the disease too - - for variantIdentifier, _ := range variantRSIDsMap{ - - // Each variant base pair is either true/false, true/true, false/false, false/true - - // Two different genomes have true/false and false/true, it will not count as a conflict - // If the locus is unphased, then there is no difference between true/false and false/true - // If the locus is phased, then this flip is only meaningful if it effects the probability of disease/passing a variant - // We already checked those probabilities for conflicts earlier - // Therefore, any flip is not considered a conflict - // We only care about conflicts where 1 genome says you have a variant and the other says you don't, or - // one says you have only 1 mutation and the other says you have 2 at that location - - firstItemReached := false - - base1HasVariant := false - base2HasVariant := false - - for _, currentGenomeDiseaseAnalysisObject := range monogenicDiseaseInfoMap{ - - variantsInfoMap := currentGenomeDiseaseAnalysisObject.VariantsInfoMap - - variantInfoObject, exists := variantsInfoMap[variantIdentifier] - if (exists == false){ - if (firstItemReached == true){ - // A previous genome has information for this variant, and the current one does not - return true, nil - } - continue - } - - currentBase1HasVariant := variantInfoObject.Base1HasVariant - currentBase2HasVariant := variantInfoObject.Base2HasVariant - - if (firstItemReached == false){ - base1HasVariant = currentBase1HasVariant - base2HasVariant = currentBase2HasVariant - firstItemReached = true - continue - } - - if (base1HasVariant == currentBase1HasVariant && base2HasVariant == currentBase2HasVariant){ - // No conflict exists - continue - } - if (base1HasVariant == currentBase2HasVariant && base2HasVariant == currentBase1HasVariant){ - // We don't count this as a conflict - continue - } - - // A conflict exists - return true, nil - } - } - - return false, nil - } - - conflictExists, err := getConflictExistsBool() - if (err != nil) { return emptyDiseaseInfoObject, err } - - personMonogenicDiseaseInfoObject.ConflictExists = conflictExists - - return personMonogenicDiseaseInfoObject, nil -} - - -//Outputs: -// -bool: Any loci tested -// -int: Person genome risk score (value between 0-10) -// -int: Person Genome Number of loci tested -// -map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo: Person disease locus info map -// Map Structure: Locus Identifier -> PersonGenomePolygenicDiseaseLocusInfo -// -error -func GetPersonGenomePolygenicDiseaseInfo(diseaseLociList []polygenicDiseases.DiseaseLocus, personLocusValuesMap map[int64]locusValue.LocusValue, lookForLocusAliases bool)(bool, int, int, map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo, error){ - - if (len(personLocusValuesMap) == 0){ - return false, 0, 0, nil, nil - } - - // Map Structure: Locus Identifier -> PersonGenomePolygenicDiseaseLocusInfo - genomeLociInfoMap := make(map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo) - - summedDiseaseRiskWeight := 0 - - minimumPossibleRiskWeightSum := 0 - maximumPossibleRiskWeightSum := 0 - - for _, locusObject := range diseaseLociList{ - - locusRSID := locusObject.LocusRSID - locusRiskWeightsMap := locusObject.RiskWeightsMap - locusOddsRatiosMap := locusObject.OddsRatiosMap - locusMinimumWeight := locusObject.MinimumRiskWeight - locusMaximumWeight := locusObject.MaximumRiskWeight - - locusValueFound, locusBase1Value, locusBase2Value, _, _, err := getLocusValueFromGenomeMap(lookForLocusAliases, personLocusValuesMap, locusRSID) - if (err != nil) { return false, 0, 0, nil, err } - if (locusValueFound == false){ - continue - } - - locusRiskWeight, locusOddsRatioIsKnown, locusOddsRatio, err := getGenomePolygenicDiseaseLocusRiskInfo(locusRiskWeightsMap, locusOddsRatiosMap, locusBase1Value, locusBase2Value) - if (err != nil) { return false, 0, 0, nil, err } - - newLocusInfoObject := geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo{ - RiskWeight: locusRiskWeight, - OddsRatioIsKnown: locusOddsRatioIsKnown, - } - - if (locusOddsRatioIsKnown == true){ - newLocusInfoObject.OddsRatio = locusOddsRatio - } - - locusIdentifierHex := locusObject.LocusIdentifier - - locusIdentifier, err := encoding.DecodeHexStringTo3ByteArray(locusIdentifierHex) - if (err != nil) { return false, 0, 0, nil, err } - - genomeLociInfoMap[locusIdentifier] = newLocusInfoObject - - minimumPossibleRiskWeightSum += locusMinimumWeight - maximumPossibleRiskWeightSum += locusMaximumWeight - - summedDiseaseRiskWeight += locusRiskWeight - } - - numberOfLociTested := len(genomeLociInfoMap) - if (numberOfLociTested == 0){ - // We have no information about this disease for this genome - return false, 0, 0, nil, nil - } - - diseaseRiskScore, err := helpers.ScaleNumberProportionally(true, summedDiseaseRiskWeight, minimumPossibleRiskWeightSum, maximumPossibleRiskWeightSum, 0, 10) - if (err != nil) { return false, 0, 0, nil, err } - - return true, diseaseRiskScore, numberOfLociTested, genomeLociInfoMap, nil -} - - -//Outputs: -// -geneticAnalysis.PersonPolygenicDiseaseInfo -// -error -func getPersonPolygenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, diseaseObject polygenicDiseases.PolygenicDisease)(geneticAnalysis.PersonPolygenicDiseaseInfo, error){ - - // We use this when returning errors - emptyDiseaseInfoObject := geneticAnalysis.PersonPolygenicDiseaseInfo{} - - diseaseLociList := diseaseObject.LociList - - // This map stores the polygenic disease for each of the person's genomes - // Map Structure: Genome Identifier -> PersonGenomePolygenicDiseaseInfo - personPolygenicDiseaseInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomePolygenicDiseaseInfo) - - // We construct polygenic disease probability info for each genome - - for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{ - - genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier - genomeMap := genomeWithMetadataObject.GenomeMap - - // This map stores the loci for this disease and does not contain loci which do not belong to this disease - // Map Structure: rsID -> Locus Value - genomeLocusValuesMap := make(map[int64]locusValue.LocusValue) - - for _, locusObject := range diseaseLociList{ - - locusRSID := locusObject.LocusRSID - - locusValueFound, _, _, _, locusValueObject, err := getLocusValueFromGenomeMap(true, genomeMap, locusRSID) - if (err != nil) { return emptyDiseaseInfoObject, err } - if (locusValueFound == false){ - continue - } - - genomeLocusValuesMap[locusRSID] = locusValueObject - } - - anyLociTested, personDiseaseRiskScore, genomeNumberOfLociTested, genomeLociInfoMap, err := GetPersonGenomePolygenicDiseaseInfo(diseaseLociList, genomeLocusValuesMap, false) - if (err != nil) { return emptyDiseaseInfoObject, err } - if (anyLociTested == false){ - continue - } - - newDiseaseInfoObject := geneticAnalysis.PersonGenomePolygenicDiseaseInfo{ - NumberOfLociTested: genomeNumberOfLociTested, - RiskScore: personDiseaseRiskScore, - LocusValuesMap: genomeLocusValuesMap, - LociInfoMap: genomeLociInfoMap, - } - - personPolygenicDiseaseInfoMap[genomeIdentifier] = newDiseaseInfoObject - } - - newPersonPolygenicDiseaseInfoObject := geneticAnalysis.PersonPolygenicDiseaseInfo{ - PolygenicDiseaseInfoMap: personPolygenicDiseaseInfoMap, - } - - if (len(personPolygenicDiseaseInfoMap) <= 1){ - // We do not need to check for conflicts, there is only <=1 genome with disease information - // Nothing left to do. Analysis is complete. - return newPersonPolygenicDiseaseInfoObject, nil - } - - // We check for conflicts between the different genome's results - - getConflictExistsBool := func()(bool, error){ - - // First we check to see if any of the genomes have different risk scores or NumberOfLociTested - - genomeRiskScore := 0 - genomeNumberOfLociTested := 0 - - firstItemReached := false - - for _, personGenomeDiseaseInfoObject := range personPolygenicDiseaseInfoMap{ - - currentGenomeRiskScore := personGenomeDiseaseInfoObject.RiskScore - currentGenomeNumberOfLociTested := personGenomeDiseaseInfoObject.NumberOfLociTested - - if (firstItemReached == false){ - genomeRiskScore = currentGenomeRiskScore - genomeNumberOfLociTested = currentGenomeNumberOfLociTested - firstItemReached = true - continue - } - - if (genomeRiskScore != currentGenomeRiskScore){ - return true, nil - } - if (genomeNumberOfLociTested != currentGenomeNumberOfLociTested){ - return true, nil - } - } - - // Now we check for conflicts between the different locus values - // We consider a conflict any time the same locus has different weights/odds ratios - // We don't care if the loci have different base pair values, so long as those base pairs have the same risk weights/odds ratios - - for _, locusObject := range diseaseLociList{ - - locusIdentifierHex := locusObject.LocusIdentifier - - locusIdentifier, err := encoding.DecodeHexStringTo3ByteArray(locusIdentifierHex) - if (err != nil) { return false, err } - - locusRiskWeight := 0 - locusOddsRatio := float64(0) - - firstItemReached := false - - for _, personGenomeDiseaseInfoObject := range personPolygenicDiseaseInfoMap{ - - genomeLociInfoMap := personGenomeDiseaseInfoObject.LociInfoMap - - genomeLocusObject, exists := genomeLociInfoMap[locusIdentifier] - if (exists == false){ - if (firstItemReached == true){ - // A previous genome has information for this locus, and the current one does not - return true, nil - } - continue - } - - genomeLocusRiskWeight := genomeLocusObject.RiskWeight - genomeLocusOddsRatio := genomeLocusObject.OddsRatio - - if (firstItemReached == false){ - locusRiskWeight = genomeLocusRiskWeight - locusOddsRatio = genomeLocusOddsRatio - firstItemReached = true - continue - } - if (locusRiskWeight == genomeLocusRiskWeight && locusOddsRatio == genomeLocusOddsRatio){ - // No conflict exists for this locus on the genomes we have already checked - continue - } - - // Conflict exists - return true, nil - } - } - - return false, nil - } - - conflictExists, err := getConflictExistsBool() - if (err != nil) { return emptyDiseaseInfoObject, err } - - newPersonPolygenicDiseaseInfoObject.ConflictExists = conflictExists - - return newPersonPolygenicDiseaseInfoObject, nil -} - - - -//Outputs: -// -geneticAnalysis.PersonTraitInfo: Trait analysis object -// -error -func getPersonTraitAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, traitObject traits.Trait)(geneticAnalysis.PersonTraitInfo, error){ - - // We use this when returning errors - emptyPersonTraitInfo := geneticAnalysis.PersonTraitInfo{} - - traitLociList := traitObject.LociList - traitRulesList := traitObject.RulesList - - // Map Structure: Genome Identifier -> PersonGenomeTraitInfo - newPersonTraitInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomeTraitInfo) - - for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{ - - genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier - genomeMap := genomeWithMetadataObject.GenomeMap - - // This map contains the locus values for the genome - // If an locus's entry doesn't exist, its value is unknown - // Map Structure: Locus rsID -> Locus Value - genomeLocusValuesMap := make(map[int64]locusValue.LocusValue) - - for _, locusRSID := range traitLociList{ - - locusBasePairKnown, _, _, _, locusValueObject, err := getLocusValueFromGenomeMap(true, genomeMap, locusRSID) - if (err != nil) { return emptyPersonTraitInfo, err } - if (locusBasePairKnown == false){ - continue - } - - genomeLocusValuesMap[locusRSID] = locusValueObject - } - - // This map contains the trait outcome scores for the genome - // Map Structure: Outcome Name -> Score - // Example: "Intolerant" -> 5 - traitOutcomeScoresMap := make(map[string]int) - - // Map Structure: Rule Identifier -> Genome Passes rule (true if the genome passes the rule) - personPassesRulesMap := make(map[[3]byte]bool) - - if (len(traitRulesList) != 0){ - - // At least 1 rule exists for this trait - - for _, ruleObject := range traitRulesList{ - - ruleIdentifierHex := ruleObject.RuleIdentifier - - ruleIdentifier, err := encoding.DecodeHexStringTo3ByteArray(ruleIdentifierHex) - if (err != nil) { return emptyPersonTraitInfo, err } - - ruleLociList := ruleObject.LociList - - genomePassesRuleIsKnown, genomePassesRule, err := GetGenomePassesTraitRuleStatus(ruleLociList, genomeMap, false) - if (err != nil) { return emptyPersonTraitInfo, err } - if (genomePassesRuleIsKnown == false){ - continue - } - - personPassesRulesMap[ruleIdentifier] = genomePassesRule - - // The rule has been passed by this genome - // We add the outcome points for the rule to the traitOutcomeScoresMap - - ruleOutcomePointsMap := ruleObject.OutcomePointsMap - - for traitOutcome, pointsChange := range ruleOutcomePointsMap{ - - traitOutcomeScoresMap[traitOutcome] += pointsChange - } - } - } - - traitOutcomesList := traitObject.OutcomesList - - // We add all outcomes for which there were no points - - for _, traitOutcome := range traitOutcomesList{ - - _, exists := traitOutcomeScoresMap[traitOutcome] - if (exists == false){ - traitOutcomeScoresMap[traitOutcome] = 0 - } - } - - numberOfRulesTested := len(personPassesRulesMap) - - newPersonGenomeTraitInfo := geneticAnalysis.PersonGenomeTraitInfo{ - NumberOfRulesTested: numberOfRulesTested, - LocusValuesMap: genomeLocusValuesMap, - OutcomeScoresMap: traitOutcomeScoresMap, - GenomePassesRulesMap: personPassesRulesMap, - } - - newPersonTraitInfoMap[genomeIdentifier] = newPersonGenomeTraitInfo - } - - newPersonTraitInfoObject := geneticAnalysis.PersonTraitInfo{ - TraitInfoMap: newPersonTraitInfoMap, - } - - if (len(newPersonTraitInfoMap) <= 1){ - // We do not need to check for conflicts, there is only <=1 genome with trait information - // Nothing left to do. Analysis is complete. - return newPersonTraitInfoObject, nil - } - - // We check for conflicts - - getConflictExistsBool := func()(bool, error){ - - //TODO: Check for locus value conflicts once locus values are used in neural network prediction. - - if (len(traitRulesList) == 0){ - return false, nil - } - - // We check to see if the outcome scores are the same for all genomes - // We also check each rule result - - firstItemReached := false - - outcomeScoresMap := make(map[string]int) - passesRulesMap := make(map[[3]byte]bool) - - for _, genomeTraitInfoObject := range newPersonTraitInfoMap{ - - currentGenomeOutcomeScoresMap := genomeTraitInfoObject.OutcomeScoresMap - currentGenomePassesRulesMap := genomeTraitInfoObject.GenomePassesRulesMap - - if (firstItemReached == false){ - outcomeScoresMap = currentGenomeOutcomeScoresMap - passesRulesMap = currentGenomePassesRulesMap - firstItemReached = true - continue - } - - areEqual := maps.Equal(currentGenomeOutcomeScoresMap, outcomeScoresMap) - if (areEqual == false){ - // A conflict exists - return true, nil - } - areEqual = maps.Equal(currentGenomePassesRulesMap, passesRulesMap) - if (areEqual == false){ - // A conflict exists - return true, nil - } - } - - return false, nil - } - - conflictExists, err := getConflictExistsBool() - if (err != nil) { return emptyPersonTraitInfo, err } - - newPersonTraitInfoObject.ConflictExists = conflictExists - - return newPersonTraitInfoObject, nil -} - - -//Outputs: -// -int: Base pair disease locus risk weight -// -bool: Base pair disease locus odds ratio known -// -float64: Base pair disease locus odds ratio -// -error -func getGenomePolygenicDiseaseLocusRiskInfo(locusRiskWeightsMap map[string]int, locusOddsRatiosMap map[string]float64, locusBase1Value string, locusBase2Value string)(int, bool, float64, error){ - - locusBasePairJoined := locusBase1Value + ";" + locusBase2Value - - riskWeight, exists := locusRiskWeightsMap[locusBasePairJoined] - if (exists == false){ - // This is an unknown base combination - // We will treat it as a 0 risk weight - return 0, true, 1, nil - } - - if (riskWeight == 0){ - return 0, true, 1, nil - } - - oddsRatio, exists := locusOddsRatiosMap[locusBasePairJoined] - if (exists == false){ - return riskWeight, false, 0, nil - } - - return riskWeight, true, oddsRatio, nil -} - -// This function checks to see if a genome will pass a trait rule -// Outputs: -// -bool: Genome passes trait rule status is known -// -bool: Genome passes trait rule -// -error -func GetGenomePassesTraitRuleStatus(ruleLociList []traits.RuleLocus, genomeMap map[int64]locusValue.LocusValue, checkForAliases bool)(bool, bool, error){ - - // We check to see if genome passes all rule loci - // To pass a rule, all of the rule's loci must be passed by the provided genome - // We consider a rule Known if the genome either passes all loci, or fails to pass 1 locus - // We consider a rule Unknown if any loci are unknown, and there are no rules which are known not to be passed - - anyLocusIsUnknown := false - - for _, locusObject := range ruleLociList{ - - locusRSID := locusObject.LocusRSID - - locusBasePairKnown, locusBase1, locusBase2, _, _, err := getLocusValueFromGenomeMap(checkForAliases, genomeMap, locusRSID) - if (err != nil) { return false, false, err } - if (locusBasePairKnown == false){ - anyLocusIsUnknown = true - // We keep searching to see if any of the rule's loci are known to not pass - continue - } - - locusBasePairJoined := locusBase1 + ";" + locusBase2 - - locusBasePairsList := locusObject.BasePairsList - - genomePassesRuleLocus := slices.Contains(locusBasePairsList, locusBasePairJoined) - if (genomePassesRuleLocus == false){ - // The genome has failed to pass a single rule locus, thus, the rule is not passed - return true, false, nil - } - } - - if (anyLocusIsUnknown == true){ - // The rule is not passed, but it's status is unknown - // There were no rules which were known not to pass - return false, false, nil - } - - // All rules were passed - - return true, true, nil -} - - -// This function will retrieve the base pair of the locus from the input genome map -// We use this function because each rsID has aliases, so we must sometimes check those aliases to find locus values -// -// Outputs: -// -bool: Valid base pair value found -// -string: Base 1 Value (Nucleotide base for the SNP) -// -string: Base 2 Value (Nucleotide base for the SNP) -// -bool: Locus base pair is phased -// -locusValue.LocusValue -// -error -func getLocusValueFromGenomeMap(checkForAliases bool, inputGenomeMap map[int64]locusValue.LocusValue, locusRSID int64)(bool, string, string, bool, locusValue.LocusValue, error){ - - // Outputs: - // -bool: Locus value found - // -locusValue.LocusValue - // -error - getLocusValue := func()(bool, locusValue.LocusValue, error){ - - currentLocusValue, exists := inputGenomeMap[locusRSID] - if (exists == true){ - return true, currentLocusValue, nil - } - - if (checkForAliases == false){ - return false, locusValue.LocusValue{}, nil - } - - // We check for aliases - - anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(locusRSID) - if (err != nil) { return false, locusValue.LocusValue{}, err } - if (anyAliasesExist == false){ - return false, locusValue.LocusValue{}, nil - } - - for _, rsidAlias := range rsidAliasesList{ - - currentLocusValue, exists := inputGenomeMap[rsidAlias] - if (exists == true){ - return true, currentLocusValue, nil - } - } - - return false, locusValue.LocusValue{}, nil - } - - locusValueFound, locusValueObject, err := getLocusValue() - if (err != nil) { return false, "", "", false, locusValue.LocusValue{}, err } - if (locusValueFound == false){ - return false, "", "", false, locusValue.LocusValue{}, nil - } - - base1Value := locusValueObject.Base1Value - base2Value := locusValueObject.Base2Value - locusIsPhased := locusValueObject.LocusIsPhased - - return true, base1Value, base2Value, locusIsPhased, locusValueObject, nil -} diff --git a/internal/genetics/createGeneticAnalysis/createGeneticAnalysis_test.go b/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis_test.go similarity index 62% rename from internal/genetics/createGeneticAnalysis/createGeneticAnalysis_test.go rename to internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis_test.go index 7410e2d..c042297 100644 --- a/internal/genetics/createGeneticAnalysis/createGeneticAnalysis_test.go +++ b/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis_test.go @@ -1,6 +1,6 @@ -package createGeneticAnalysis_test +package createCoupleGeneticAnalysis_test -import "seekia/internal/genetics/createGeneticAnalysis" +import "seekia/internal/genetics/createCoupleGeneticAnalysis" import "seekia/internal/genetics/readGeneticAnalysis" @@ -16,150 +16,6 @@ import "seekia/internal/helpers" import "testing" import "errors" -func TestCreatePersonGeneticAnalysis_SingleGenome(t *testing.T){ - - err := locusMetadata.InitializeLocusMetadataVariables() - if (err != nil) { - t.Fatalf("InitializeLocusMetadataVariables failed: " + err.Error()) - } - - monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() - traits.InitializeTraitVariables() - - genomeIdentifier, err := helpers.GetNewRandom16ByteArray() - if (err != nil) { - t.Fatalf("Failed to get random 16 byte array: " + err.Error()) - } - - fakeRawGenome, _, _, _, err := createRawGenomes.CreateFakeRawGenome_AncestryDNA() - if (err != nil) { - t.Fatalf("Failed to create fake raw AncestryDNA genome: " + err.Error()) - } - - genomeIsValid, rawGenomeWithMetadata, err := prepareRawGenomes.CreateRawGenomeWithMetadataObject(genomeIdentifier, fakeRawGenome) - if (err != nil){ - t.Fatalf("CreateRawGenomeWithMetadataObject failed: " + err.Error()) - } - if (genomeIsValid == false){ - t.Fatalf("CreateRawGenomeWithMetadataObject failed: Genome is not valid.") - } - - genomesList := []prepareRawGenomes.RawGenomeWithMetadata{rawGenomeWithMetadata} - - updateProgressFunction := func(_ int)error{ - return nil - } - - checkIfProcessIsStoppedFunction := func()bool{ - return false - } - - processCompleted, personGeneticAnalysis, err := createGeneticAnalysis.CreatePersonGeneticAnalysis(genomesList, updateProgressFunction, checkIfProcessIsStoppedFunction) - if (err != nil){ - t.Fatalf("Failed to create person genetic analysis: " + err.Error()) - } - if (processCompleted == false){ - t.Fatalf("Failed to create person genetic analysis: Process did not complete.") - } - - personGeneticAnalysisObject, err := readGeneticAnalysis.ReadPersonGeneticAnalysisString(personGeneticAnalysis) - if (err != nil){ - t.Fatalf("Failed to read person genetic analysis string: " + err.Error()) - } - - err = readGeneticAnalysis.VerifyPersonGeneticAnalysis(personGeneticAnalysisObject) - if (err != nil){ - t.Fatalf("Failed to read person genetic analysis: " + err.Error()) - } -} - - -func TestCreatePersonGeneticAnalysis_MultipleGenomes(t *testing.T){ - - err := locusMetadata.InitializeLocusMetadataVariables() - if (err != nil) { - t.Fatalf("InitializeLocusMetadataVariables failed: " + err.Error()) - } - - monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() - traits.InitializeTraitVariables() - - numberOfGenomesToAdd := helpers.GetRandomIntWithinRange(2, 5) - - genomesList := make([]prepareRawGenomes.RawGenomeWithMetadata, 0, numberOfGenomesToAdd) - - for i:=0; i < numberOfGenomesToAdd; i++{ - - genomeIdentifier, err := helpers.GetNewRandom16ByteArray() - if (err != nil) { - t.Fatalf("Failed to get random 16 byte array: " + err.Error()) - } - - getFakeRawGenome := func()(string, error){ - - is23andMe := helpers.GetRandomBool() - if (is23andMe == true){ - fakeRawGenome, _, _, _, err := createRawGenomes.CreateFakeRawGenome_23andMe() - if (err != nil) { - return "", errors.New("Failed to create fake raw 23andMe genome: " + err.Error()) - } - - return fakeRawGenome, nil - } - - fakeRawGenome, _, _, _, err := createRawGenomes.CreateFakeRawGenome_AncestryDNA() - if (err != nil) { - return "", errors.New("Failed to create fake raw AncestryDNA genome: " + err.Error()) - } - - return fakeRawGenome, nil - } - - fakeRawGenome, err := getFakeRawGenome() - if (err != nil){ - t.Fatalf("Failed to get fake raw genome: " + err.Error()) - } - - genomeIsValid, rawGenomeWithMetadata, err := prepareRawGenomes.CreateRawGenomeWithMetadataObject(genomeIdentifier, fakeRawGenome) - if (err != nil){ - t.Fatalf("CreateRawGenomeWithMetadataObject failed: " + err.Error()) - } - if (genomeIsValid == false){ - t.Fatalf("CreateRawGenomeWithMetadataObject failed: Genome is not valid.") - } - - genomesList = append(genomesList, rawGenomeWithMetadata) - } - - updateProgressFunction := func(_ int)error{ - return nil - } - - checkIfProcessIsStoppedFunction := func()bool{ - return false - } - - processCompleted, personGeneticAnalysis, err := createGeneticAnalysis.CreatePersonGeneticAnalysis(genomesList, updateProgressFunction, checkIfProcessIsStoppedFunction) - if (err != nil){ - t.Fatalf("Failed to create person genetic analysis: " + err.Error()) - } - if (processCompleted == false){ - t.Fatalf("Failed to create person genetic analysis: Process did not complete.") - } - - personGeneticAnalysisObject, err := readGeneticAnalysis.ReadPersonGeneticAnalysisString(personGeneticAnalysis) - if (err != nil){ - t.Fatalf("Failed to read person genetic analysis string: " + err.Error()) - } - - err = readGeneticAnalysis.VerifyPersonGeneticAnalysis(personGeneticAnalysisObject) - if (err != nil){ - t.Fatalf("Failed to read person genetic analysis: " + err.Error()) - } -} - func TestCreateCoupleGeneticAnalysis_SingleGenomes(t *testing.T){ @@ -215,7 +71,7 @@ func TestCreateCoupleGeneticAnalysis_SingleGenomes(t *testing.T){ return false } - processCompleted, coupleGeneticAnalysis, err := createGeneticAnalysis.CreateCoupleGeneticAnalysis(person1GenomesList, person2GenomesList, updateProgressFunction, checkIfProcessIsStoppedFunction) + processCompleted, coupleGeneticAnalysis, err := createCoupleGeneticAnalysis.CreateCoupleGeneticAnalysis(person1GenomesList, person2GenomesList, updateProgressFunction, checkIfProcessIsStoppedFunction) if (err != nil){ t.Fatalf("Failed to create couple genetic analysis: " + err.Error()) } @@ -313,7 +169,7 @@ func TestCreateCoupleGeneticAnalysis_SingleAndMultipleGenomes(t *testing.T){ return false } - processCompleted, coupleGeneticAnalysis, err := createGeneticAnalysis.CreateCoupleGeneticAnalysis(person1GenomesList, person2GenomesList, updateProgressFunction, checkIfProcessIsStoppedFunction) + processCompleted, coupleGeneticAnalysis, err := createCoupleGeneticAnalysis.CreateCoupleGeneticAnalysis(person1GenomesList, person2GenomesList, updateProgressFunction, checkIfProcessIsStoppedFunction) if (err != nil){ t.Fatalf("Failed to create couple genetic analysis: " + err.Error()) } @@ -406,7 +262,7 @@ func TestCreateCoupleGeneticAnalysis_MultipleGenomes(t *testing.T){ return false } - processCompleted, coupleGeneticAnalysis, err := createGeneticAnalysis.CreateCoupleGeneticAnalysis(person1GenomesList, person2GenomesList, updateProgressFunction, checkIfProcessIsStoppedFunction) + processCompleted, coupleGeneticAnalysis, err := createCoupleGeneticAnalysis.CreateCoupleGeneticAnalysis(person1GenomesList, person2GenomesList, updateProgressFunction, checkIfProcessIsStoppedFunction) if (err != nil){ t.Fatalf("Failed to create couple genetic analysis: " + err.Error()) } @@ -425,3 +281,4 @@ func TestCreateCoupleGeneticAnalysis_MultipleGenomes(t *testing.T){ } } + diff --git a/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis.go b/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis.go new file mode 100644 index 0000000..9c30f8c --- /dev/null +++ b/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis.go @@ -0,0 +1,1157 @@ + +// createPersonGeneticAnalysis provides functions to create a Person genetic analysis +// These analyses are performed on one or more genome files. +// They contain 3 categories of results: Monogenic Diseases, Polygenic Diseases and Traits +// Use createCoupleGeneticAnalysis.go to create Couple genetic analyses + +package createPersonGeneticAnalysis + +// Disclaimer: I am a novice in the ways of genetics. This package could be flawed in numerous ways. + +// TODO: We want to eventually use neural nets for both trait and polygenic disease analysis (see geneticPrediction.go) +// These will be trained on a set of genomes and will output a probability analysis for each trait/disease +// This is only possible once we get access to the necessary training data + +// TODO: Add the ability to weight different genome files based on their reliability. +// Some files are much more accurate because they record each location many times. + +import "seekia/resources/geneticReferences/locusMetadata" +import "seekia/resources/geneticReferences/monogenicDiseases" +import "seekia/resources/geneticReferences/polygenicDiseases" +import "seekia/resources/geneticReferences/traits" + +import "seekia/internal/encoding" +import "seekia/internal/genetics/geneticAnalysis" +import "seekia/internal/genetics/locusValue" +import "seekia/internal/genetics/prepareRawGenomes" +import "seekia/internal/helpers" + +import "errors" +import "slices" +import "maps" + + +//Outputs: +// -bool: Process completed (it was not stopped manually before completion) +// -string: New Genetic analysis string (Encoded in MessagePack) +// -error +func CreatePersonGeneticAnalysis(genomesList []prepareRawGenomes.RawGenomeWithMetadata, updatePercentageCompleteFunction func(int)error, checkIfProcessIsStopped func()bool)(bool, string, error){ + + prepareRawGenomesUpdatePercentageCompleteFunction := func(newPercentage int)error{ + + newPercentageCompletion, err := helpers.ScaleNumberProportionally(true, newPercentage, 0, 100, 0, 50) + if (err != nil){ return err } + + err = updatePercentageCompleteFunction(newPercentageCompletion) + if (err != nil) { return err } + + return nil + } + + genomesWithMetadataList, allRawGenomeIdentifiersList, multipleGenomesExist, onlyExcludeConflictsGenomeIdentifier, onlyIncludeSharedGenomeIdentifier, err := prepareRawGenomes.GetGenomesWithMetadataListFromRawGenomesList(genomesList, prepareRawGenomesUpdatePercentageCompleteFunction) + if (err != nil) { return false, "", err } + + newGeneticAnalysisObject := geneticAnalysis.PersonAnalysis{ + AnalysisVersion: 1, + CombinedGenomesExist: multipleGenomesExist, + AllRawGenomeIdentifiersList: allRawGenomeIdentifiersList, + } + + if (multipleGenomesExist == true){ + + newGeneticAnalysisObject.OnlyExcludeConflictsGenomeIdentifier = onlyExcludeConflictsGenomeIdentifier + newGeneticAnalysisObject.OnlyIncludeSharedGenomeIdentifier = onlyIncludeSharedGenomeIdentifier + } + + processIsStopped := checkIfProcessIsStopped() + if (processIsStopped == true){ + return false, "", nil + } + + monogenicDiseasesList, err := monogenicDiseases.GetMonogenicDiseaseObjectsList() + if (err != nil) { return false, "", err } + + // Map Structure: Disease Name -> PersonMonogenicDiseaseInfo + analysisMonogenicDiseasesMap := make(map[string]geneticAnalysis.PersonMonogenicDiseaseInfo) + + for _, monogenicDiseaseObject := range monogenicDiseasesList{ + + diseaseName := monogenicDiseaseObject.DiseaseName + + personDiseaseAnalysisObject, err := GetPersonMonogenicDiseaseAnalysis(genomesWithMetadataList, monogenicDiseaseObject) + if (err != nil) { return false, "", err } + + analysisMonogenicDiseasesMap[diseaseName] = personDiseaseAnalysisObject + } + + newGeneticAnalysisObject.MonogenicDiseasesMap = analysisMonogenicDiseasesMap + + polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList() + if (err != nil) { return false, "", err } + + // Map Structure: Disease Name -> PersonPolygenicDiseaseInfo + analysisPolygenicDiseasesMap := make(map[string]geneticAnalysis.PersonPolygenicDiseaseInfo) + + for _, diseaseObject := range polygenicDiseaseObjectsList{ + + personDiseaseAnalysisObject, err := GetPersonPolygenicDiseaseAnalysis(genomesWithMetadataList, diseaseObject) + if (err != nil) { return false, "", err } + + diseaseName := diseaseObject.DiseaseName + + analysisPolygenicDiseasesMap[diseaseName] = personDiseaseAnalysisObject + } + + newGeneticAnalysisObject.PolygenicDiseasesMap = analysisPolygenicDiseasesMap + + traitObjectsList, err := traits.GetTraitObjectsList() + if (err != nil) { return false, "", err } + + // Map Structure: Trait Name -> PersonTraitInfo + analysisTraitsMap := make(map[string]geneticAnalysis.PersonTraitInfo) + + for _, traitObject := range traitObjectsList{ + + personTraitAnalysisObject, err := GetPersonTraitAnalysis(genomesWithMetadataList, traitObject) + if (err != nil) { return false, "", err } + + traitName := traitObject.TraitName + + analysisTraitsMap[traitName] = personTraitAnalysisObject + } + + newGeneticAnalysisObject.TraitsMap = analysisTraitsMap + + analysisBytes, err := encoding.EncodeMessagePackBytes(newGeneticAnalysisObject) + if (err != nil) { return false, "", err } + + analysisString := string(analysisBytes) + + return true, analysisString, nil +} + + +//Outputs: +// -geneticAnalysis.PersonMonogenicDiseaseInfo: Monogenic disease analysis object +// -error +func GetPersonMonogenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, diseaseObject monogenicDiseases.MonogenicDisease)(geneticAnalysis.PersonMonogenicDiseaseInfo, error){ + + emptyDiseaseInfoObject := geneticAnalysis.PersonMonogenicDiseaseInfo{} + + dominantOrRecessive := diseaseObject.DominantOrRecessive + variantsList := diseaseObject.VariantsList + + // We use this map to keep track of which RSIDs corresponds to each variant + // We also use it to have a map of all variants for the disease + // Map Structure: Variant Identifier -> []rsID + variantRSIDsMap := make(map[[3]byte][]int64) + + // This map stores all rsIDs for this monogenic disease + // These are locations in the disease's gene which, if mutated, are known to cause the disease + // We use this map to avoid duplicate rsIDs, because one rsID can have multiple variants which belong to it + // We also store all alias rsIDs in this map + allRSIDsMap := make(map[int64]struct{}) + + for _, variantObject := range variantsList{ + + variantIdentifierHex := variantObject.VariantIdentifier + + variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex) + if (err != nil) { return emptyDiseaseInfoObject, err } + + variantRSID := variantObject.VariantRSID + + variantRSIDsList := []int64{variantRSID} + + // We add aliases to variantRSIDsList + + anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(variantRSID) + if (err != nil) { return emptyDiseaseInfoObject, err } + if (anyAliasesExist == true){ + variantRSIDsList = append(variantRSIDsList, rsidAliasesList...) + } + + variantRSIDsMap[variantIdentifier] = variantRSIDsList + + for _, rsID := range variantRSIDsList{ + allRSIDsMap[rsID] = struct{}{} + } + } + + // Now we create a new map without any rsID aliases + // Each rsID in this map represents a unique locus on the genome + // Each rsID may have aliases, but they are not included in this map + allUniqueRSIDsMap := make(map[int64]struct{}) + + for rsID, _ := range allRSIDsMap{ + + anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(rsID) + if (err != nil) { return emptyDiseaseInfoObject, err } + if (anyAliasesExist == false){ + allUniqueRSIDsMap[rsID] = struct{}{} + continue + } + + // We see if we already added an alias of this rsID to the map + + checkIfAliasAlreadyExists := func()bool{ + + for _, rsIDAlias := range rsidAliasesList{ + _, exists := allUniqueRSIDsMap[rsIDAlias] + if (exists == true){ + return true + } + } + return false + } + + aliasAlreadyExists := checkIfAliasAlreadyExists() + if (aliasAlreadyExists == true){ + // We already added this alias + continue + } + allUniqueRSIDsMap[rsID] = struct{}{} + } + + // Map Structure: Genome Identifier -> PersonGenomeMonogenicDiseaseInfo + monogenicDiseaseInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomeMonogenicDiseaseInfo) + + for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{ + + genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier + genomeMap := genomeWithMetadataObject.GenomeMap + + // This stores all variant info for this genome + // Map Structure: Variant Identifier -> PersonGenomeMonogenicDiseaseVariantInfo + variantsInfoMap := make(map[[3]byte]geneticAnalysis.PersonGenomeMonogenicDiseaseVariantInfo) + + for _, variantObject := range variantsList{ + + variantIdentifierHex := variantObject.VariantIdentifier + + variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex) + if (err != nil) { return emptyDiseaseInfoObject, err } + + variantRSID := variantObject.VariantRSID + + basePairValueFound, base1Value, base2Value, locusIsPhased, _, err := GetLocusValueFromGenomeMap(true, genomeMap, variantRSID) + if (err != nil) { return emptyDiseaseInfoObject, err } + if (basePairValueFound == false){ + + // This genome does not contain info for this variant + // We skip it + continue + } + + // This genome has at least 1 variant + + variantDefectiveBase := variantObject.DefectiveBase + + getBaseIsVariantMutationBool := func(inputBase string)bool{ + + if (inputBase == variantDefectiveBase){ + return true + } + // Base could be mutated to a different unhealthy base + // That mutation could be a neutral/healthier change + // We only care about this specific variant + return false + } + + base1IsDefective := getBaseIsVariantMutationBool(base1Value) + base2IsDefective := getBaseIsVariantMutationBool(base2Value) + + newDiseaseVariantInfoObject := geneticAnalysis.PersonGenomeMonogenicDiseaseVariantInfo{ + Base1HasVariant: base1IsDefective, + Base2HasVariant: base2IsDefective, + LocusIsPhased: locusIsPhased, + } + + variantsInfoMap[variantIdentifier] = newDiseaseVariantInfoObject + + //TODO: Add LocusIsPhased to readGeneticAnalysis package + } + + // We are done adding variant information for the genome + // Now we determine probability that user will pass a disease variant to offspring, and if the user has the disease + + numberOfVariantsTested := len(variantsInfoMap) + + if (numberOfVariantsTested == 0){ + // We don't know anything about this genome's disease risk for this disease + // We won't add any information to the map + continue + } + + // This stores the number of loci that were tested + // Each locus can have multiple potential variants + numberOfLociTested := 0 + + // This stores the number of tested loci that are phased + // A higher number means that the results are more potentially more accurate + // It is only more accurate if multiple heterozygous variants on seperate loci exist. + numberOfPhasedLoci := 0 + + for rsID, _ := range allUniqueRSIDsMap{ + + locusValueExists, _, _, locusIsPhased, _, err := GetLocusValueFromGenomeMap(true, genomeMap, rsID) + if (err != nil) { return emptyDiseaseInfoObject, err } + if (locusValueExists == false){ + continue + } + + numberOfLociTested += 1 + + if (locusIsPhased == true){ + numberOfPhasedLoci += 1 + } + } + + // Outputs: + // -bool: Person has disease + // -float64: Probability Person will pass a defect (variant) to offspring (0-1) + // -error + getPersonDiseaseInfo := func()(bool, float64, error){ + + // These variables are used to count the number of defective variants that exist on each chromosome + numberOfVariants_Chromosome1 := 0 + numberOfVariants_Chromosome2 := 0 + numberOfVariants_UnknownChromosome := 0 + + // We use this map to keep track of how many mutations exist for each rsID + // This allows us to know if 2 different variant mutations exist for a single rsID + // For example, base1 is a different deleterious mutation than base2 + // If this ever happens, we know that the user has the disease, + // because both copies of the gene locus are defective. + rsidMutationsMap := make(map[int64]int) + + for variantIdentifier, variantInfoObject := range variantsInfoMap{ + + locusIsPhasedStatus := variantInfoObject.LocusIsPhased + + base1HasVariant := variantInfoObject.Base1HasVariant + base2HasVariant := variantInfoObject.Base2HasVariant + + if (base1HasVariant == false && base2HasVariant == false){ + // Neither chromosome contains the variant mutation. + continue + } + + if (base1HasVariant == true && base2HasVariant == true){ + // Both chromosomes contain the same variant mutation. + // Person has the disease. + // Person will definitely pass disease variant to offspring. + return true, 1, nil + } + + // We know that this variant exists on 1 of the bases, but not both. + + variantRSIDsList, exists := variantRSIDsMap[variantIdentifier] + if (exists == false){ + return false, 0, errors.New("variantRSIDsMap missing variantIdentifier.") + } + + for _, rsID := range variantRSIDsList{ + rsidMutationsMap[rsID] += 1 + } + + if (locusIsPhasedStatus == true){ + + if (base1HasVariant == true){ + numberOfVariants_Chromosome1 += 1 + } + if (base2HasVariant == true){ + numberOfVariants_Chromosome2 += 1 + } + } else { + + if (base1HasVariant == true || base2HasVariant == true){ + numberOfVariants_UnknownChromosome += 1 + } + } + } + + totalNumberOfVariants := numberOfVariants_Chromosome1 + numberOfVariants_Chromosome2 + numberOfVariants_UnknownChromosome + + if (totalNumberOfVariants == 0){ + // Person does not have any disease variants. + // They do not have the disease, and have no chance of passing a disease variant + return false, 0, nil + } + + // Now we check to see if there are any loci which have 2 different variants, one for each base + + for _, numberOfMutations := range rsidMutationsMap{ + + if (numberOfMutations >= 2){ + // Person has 2 mutations on the same location + // They must have the disease, and will definitely pass a variant to their offspring + return true, 1, nil + } + } + + // At this point, we know that there are no homozygous variant mutations + // All variant mutations are heterozygous, meaning the other chromosome strand's base is healthy + + //Outputs: + // -bool: Person has disease + getPersonHasDiseaseBool := func()bool{ + + if (dominantOrRecessive == "Dominant"){ + // Only 1 variant is needed for the person to have the disease + // We know they have at least 1 variant + return true + } + + // dominantOrRecessive == "Recessive" + + if (totalNumberOfVariants == 1){ + // There is only 1 variant in total. + // This single variant cannot exist on both chromosomes. + // The person does not have the disease + return false + } + + // We know that there are at least 2 variants + + if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){ + + // We know there is at least 1 variant mutation on each chromosome + // Therefore, the person has the disease + return true + } + + if (numberOfVariants_UnknownChromosome == 0){ + + // We know that variants do not exist on both chromosomes, only on 1. + // Thus, the person does not have the disease + return false + } + + // We know there are at least 2 variants + // We know there is at least 1 variant whose phase is unknown + + // If all mutations are on the same chromosome, the person does not have the disease. + // If at least 1 mutation exists on each chromosome, the person does have the disease. + // Either way, we don't know enough to say if the person has the disease. + // We will report that they do not, because their genome does not conclusively say that they do. + // This is why phased genomes are useful and provide a more accurate reading + // TODO: Explain this to the user in the GUI + // We must explain that unphased genomes will not detect disease sometimes + + return false + } + + personHasDiseaseBool := getPersonHasDiseaseBool() + + // Output: + // -float64: Probability person will pass a disease variant to their offspring (0-1) + getPersonWillPassVariantProbability := func()float64{ + + if (totalNumberOfVariants == 1){ + + // There is only 1 variant on any chromosome + // The probability of the person passing a variant is 50%. + return 0.5 + } + + // We know that there are at least 2 variants + + if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){ + + // We know there is at least 1 variant mutation on each chromosome + // Therefore, the person will definitely pass a variant + return 1 + } + if (numberOfVariants_UnknownChromosome == 0){ + + // We know that variants do not exist on both chromosomes, only on 1. + // Thus, the person has a 50% probability of passing a variant + return 0.5 + } + + // We know all variants are heterozygous + + // From Wikipeia: + // The human genome contains somewhere between 19,000 and 20,000 protein-coding genes. + // These genes contain an average of 10 introns and the average size of an intron is about 6 kb (6,000 base pairs) + // This means that the average size of a protein-coding gene is about 62 kb (62,000 base pairs) + + // The probability of a recombination breakpoint occurring within the gene is very small + // If there is 1 breakpoint every 100 million locations, on average, and each gene is 62,000 base pairs long, + // then the probability of a breakpoint occurring within a gene is 62,000/100,000,000 = 0.00062 = .062% + // Thus, we disregard the risk of a breakpoint occurring within a gene + // I also read somewhere that breakpoints are less likely to occurr within genes, which makes this likelihood even smaller + + // At this point, we know there at at least 2 variants + // We know that at least 1 of the variants has an unknown phase + // We don't know if all of the variants belong to the same chromosome + // If variants exist on both chromosomes, then the probability of passing a variant is 100% + // If all variants exist on the same chromosome, then the probability of passing a variant is 50% + // We know there is at least a 50% chance of passing a variant, and possibly higher + + return 0.5 + } + + personWillPassVariantProbability := getPersonWillPassVariantProbability() + + return personHasDiseaseBool, personWillPassVariantProbability, nil + } + + personHasDisease, probabilityPersonWillPassAnyVariant, err := getPersonDiseaseInfo() + if (err != nil) { return emptyDiseaseInfoObject, err } + + percentageProbabilityPersonWillPassADiseaseVariant := int(probabilityPersonWillPassAnyVariant * 100) + + diseaseAnalysisObject := geneticAnalysis.PersonGenomeMonogenicDiseaseInfo{ + PersonHasDisease: personHasDisease, + NumberOfVariantsTested: numberOfVariantsTested, + NumberOfLociTested: numberOfLociTested, + NumberOfPhasedLoci: numberOfPhasedLoci, + ProbabilityOfPassingADiseaseVariant: percentageProbabilityPersonWillPassADiseaseVariant, + VariantsInfoMap: variantsInfoMap, + } + + monogenicDiseaseInfoMap[genomeIdentifier] = diseaseAnalysisObject + } + + personMonogenicDiseaseInfoObject := geneticAnalysis.PersonMonogenicDiseaseInfo{ + MonogenicDiseaseInfoMap: monogenicDiseaseInfoMap, + } + + if (len(monogenicDiseaseInfoMap) <= 1){ + // We do not need to check for conflicts, there is only <=1 genome with disease information + // Nothing left to do. Analysis is complete. + return personMonogenicDiseaseInfoObject, nil + } + + // We check for conflicts + + getConflictExistsBool := func()(bool, error){ + + firstItemReached := false + + personHasDisease := false + probabilityOfPassingAVariant := 0 + + for _, currentGenomeDiseaseAnalysisObject := range monogenicDiseaseInfoMap{ + + currentGenomePersonHasDisease := currentGenomeDiseaseAnalysisObject.PersonHasDisease + currentGenomeProbabilityOfPassingAVariant := currentGenomeDiseaseAnalysisObject.ProbabilityOfPassingADiseaseVariant + + if (firstItemReached == false){ + personHasDisease = currentGenomePersonHasDisease + probabilityOfPassingAVariant = currentGenomeProbabilityOfPassingAVariant + firstItemReached = true + continue + } + + if (currentGenomePersonHasDisease != personHasDisease){ + return true, nil + } + if (currentGenomeProbabilityOfPassingAVariant != probabilityOfPassingAVariant){ + return true, nil + } + } + + // Now we test variants for conflicts + // We are only doing this to see if there are variants which one genome has and another doesn't + // For example, the analysis results say that you have a 50% chance of passing a variant for both genomes, but + // they have detected a different variant for each genome. + // This means that your real risk of passing a variant may actually be higher, and you are more likely to have the disease too + + for variantIdentifier, _ := range variantRSIDsMap{ + + // Each variant base pair is either true/false, true/true, false/false, false/true + + // Two different genomes have true/false and false/true, it will not count as a conflict + // If the locus is unphased, then there is no difference between true/false and false/true + // If the locus is phased, then this flip is only meaningful if it effects the probability of disease/passing a variant + // We already checked those probabilities for conflicts earlier + // Therefore, any flip is not considered a conflict + // We only care about conflicts where 1 genome says you have a variant and the other says you don't, or + // one says you have only 1 mutation and the other says you have 2 at that location + + firstItemReached := false + + base1HasVariant := false + base2HasVariant := false + + for _, currentGenomeDiseaseAnalysisObject := range monogenicDiseaseInfoMap{ + + variantsInfoMap := currentGenomeDiseaseAnalysisObject.VariantsInfoMap + + variantInfoObject, exists := variantsInfoMap[variantIdentifier] + if (exists == false){ + if (firstItemReached == true){ + // A previous genome has information for this variant, and the current one does not + return true, nil + } + continue + } + + currentBase1HasVariant := variantInfoObject.Base1HasVariant + currentBase2HasVariant := variantInfoObject.Base2HasVariant + + if (firstItemReached == false){ + base1HasVariant = currentBase1HasVariant + base2HasVariant = currentBase2HasVariant + firstItemReached = true + continue + } + + if (base1HasVariant == currentBase1HasVariant && base2HasVariant == currentBase2HasVariant){ + // No conflict exists + continue + } + if (base1HasVariant == currentBase2HasVariant && base2HasVariant == currentBase1HasVariant){ + // We don't count this as a conflict + continue + } + + // A conflict exists + return true, nil + } + } + + return false, nil + } + + conflictExists, err := getConflictExistsBool() + if (err != nil) { return emptyDiseaseInfoObject, err } + + personMonogenicDiseaseInfoObject.ConflictExists = conflictExists + + return personMonogenicDiseaseInfoObject, nil +} + + +//Outputs: +// -bool: Any loci tested +// -int: Person genome risk score (value between 0-10) +// -int: Person Genome Number of loci tested +// -map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo: Person disease locus info map +// Map Structure: Locus Identifier -> PersonGenomePolygenicDiseaseLocusInfo +// -error +func GetPersonGenomePolygenicDiseaseInfo(diseaseLociList []polygenicDiseases.DiseaseLocus, personLocusValuesMap map[int64]locusValue.LocusValue, lookForLocusAliases bool)(bool, int, int, map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo, error){ + + if (len(personLocusValuesMap) == 0){ + return false, 0, 0, nil, nil + } + + // Map Structure: Locus Identifier -> PersonGenomePolygenicDiseaseLocusInfo + genomeLociInfoMap := make(map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo) + + summedDiseaseRiskWeight := 0 + + minimumPossibleRiskWeightSum := 0 + maximumPossibleRiskWeightSum := 0 + + for _, locusObject := range diseaseLociList{ + + locusRSID := locusObject.LocusRSID + locusRiskWeightsMap := locusObject.RiskWeightsMap + locusOddsRatiosMap := locusObject.OddsRatiosMap + locusMinimumWeight := locusObject.MinimumRiskWeight + locusMaximumWeight := locusObject.MaximumRiskWeight + + locusValueFound, locusBase1Value, locusBase2Value, _, _, err := GetLocusValueFromGenomeMap(lookForLocusAliases, personLocusValuesMap, locusRSID) + if (err != nil) { return false, 0, 0, nil, err } + if (locusValueFound == false){ + continue + } + + locusRiskWeight, locusOddsRatioIsKnown, locusOddsRatio, err := GetGenomePolygenicDiseaseLocusRiskInfo(locusRiskWeightsMap, locusOddsRatiosMap, locusBase1Value, locusBase2Value) + if (err != nil) { return false, 0, 0, nil, err } + + newLocusInfoObject := geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo{ + RiskWeight: locusRiskWeight, + OddsRatioIsKnown: locusOddsRatioIsKnown, + } + + if (locusOddsRatioIsKnown == true){ + newLocusInfoObject.OddsRatio = locusOddsRatio + } + + locusIdentifierHex := locusObject.LocusIdentifier + + locusIdentifier, err := encoding.DecodeHexStringTo3ByteArray(locusIdentifierHex) + if (err != nil) { return false, 0, 0, nil, err } + + genomeLociInfoMap[locusIdentifier] = newLocusInfoObject + + minimumPossibleRiskWeightSum += locusMinimumWeight + maximumPossibleRiskWeightSum += locusMaximumWeight + + summedDiseaseRiskWeight += locusRiskWeight + } + + numberOfLociTested := len(genomeLociInfoMap) + if (numberOfLociTested == 0){ + // We have no information about this disease for this genome + return false, 0, 0, nil, nil + } + + diseaseRiskScore, err := helpers.ScaleNumberProportionally(true, summedDiseaseRiskWeight, minimumPossibleRiskWeightSum, maximumPossibleRiskWeightSum, 0, 10) + if (err != nil) { return false, 0, 0, nil, err } + + return true, diseaseRiskScore, numberOfLociTested, genomeLociInfoMap, nil +} + + +//Outputs: +// -geneticAnalysis.PersonPolygenicDiseaseInfo +// -error +func GetPersonPolygenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, diseaseObject polygenicDiseases.PolygenicDisease)(geneticAnalysis.PersonPolygenicDiseaseInfo, error){ + + // We use this when returning errors + emptyDiseaseInfoObject := geneticAnalysis.PersonPolygenicDiseaseInfo{} + + diseaseLociList := diseaseObject.LociList + + // This map stores the polygenic disease for each of the person's genomes + // Map Structure: Genome Identifier -> PersonGenomePolygenicDiseaseInfo + personPolygenicDiseaseInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomePolygenicDiseaseInfo) + + // We construct polygenic disease probability info for each genome + + for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{ + + genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier + genomeMap := genomeWithMetadataObject.GenomeMap + + // This map stores the loci for this disease and does not contain loci which do not belong to this disease + // Map Structure: rsID -> Locus Value + genomeLocusValuesMap := make(map[int64]locusValue.LocusValue) + + for _, locusObject := range diseaseLociList{ + + locusRSID := locusObject.LocusRSID + + locusValueFound, _, _, _, locusValueObject, err := GetLocusValueFromGenomeMap(true, genomeMap, locusRSID) + if (err != nil) { return emptyDiseaseInfoObject, err } + if (locusValueFound == false){ + continue + } + + genomeLocusValuesMap[locusRSID] = locusValueObject + } + + anyLociTested, personDiseaseRiskScore, genomeNumberOfLociTested, genomeLociInfoMap, err := GetPersonGenomePolygenicDiseaseInfo(diseaseLociList, genomeLocusValuesMap, false) + if (err != nil) { return emptyDiseaseInfoObject, err } + if (anyLociTested == false){ + continue + } + + newDiseaseInfoObject := geneticAnalysis.PersonGenomePolygenicDiseaseInfo{ + NumberOfLociTested: genomeNumberOfLociTested, + RiskScore: personDiseaseRiskScore, + LocusValuesMap: genomeLocusValuesMap, + LociInfoMap: genomeLociInfoMap, + } + + personPolygenicDiseaseInfoMap[genomeIdentifier] = newDiseaseInfoObject + } + + newPersonPolygenicDiseaseInfoObject := geneticAnalysis.PersonPolygenicDiseaseInfo{ + PolygenicDiseaseInfoMap: personPolygenicDiseaseInfoMap, + } + + if (len(personPolygenicDiseaseInfoMap) <= 1){ + // We do not need to check for conflicts, there is only <=1 genome with disease information + // Nothing left to do. Analysis is complete. + return newPersonPolygenicDiseaseInfoObject, nil + } + + // We check for conflicts between the different genome's results + + getConflictExistsBool := func()(bool, error){ + + // First we check to see if any of the genomes have different risk scores or NumberOfLociTested + + genomeRiskScore := 0 + genomeNumberOfLociTested := 0 + + firstItemReached := false + + for _, personGenomeDiseaseInfoObject := range personPolygenicDiseaseInfoMap{ + + currentGenomeRiskScore := personGenomeDiseaseInfoObject.RiskScore + currentGenomeNumberOfLociTested := personGenomeDiseaseInfoObject.NumberOfLociTested + + if (firstItemReached == false){ + genomeRiskScore = currentGenomeRiskScore + genomeNumberOfLociTested = currentGenomeNumberOfLociTested + firstItemReached = true + continue + } + + if (genomeRiskScore != currentGenomeRiskScore){ + return true, nil + } + if (genomeNumberOfLociTested != currentGenomeNumberOfLociTested){ + return true, nil + } + } + + // Now we check for conflicts between the different locus values + // We consider a conflict any time the same locus has different weights/odds ratios + // We don't care if the loci have different base pair values, so long as those base pairs have the same risk weights/odds ratios + + for _, locusObject := range diseaseLociList{ + + locusIdentifierHex := locusObject.LocusIdentifier + + locusIdentifier, err := encoding.DecodeHexStringTo3ByteArray(locusIdentifierHex) + if (err != nil) { return false, err } + + locusRiskWeight := 0 + locusOddsRatio := float64(0) + + firstItemReached := false + + for _, personGenomeDiseaseInfoObject := range personPolygenicDiseaseInfoMap{ + + genomeLociInfoMap := personGenomeDiseaseInfoObject.LociInfoMap + + genomeLocusObject, exists := genomeLociInfoMap[locusIdentifier] + if (exists == false){ + if (firstItemReached == true){ + // A previous genome has information for this locus, and the current one does not + return true, nil + } + continue + } + + genomeLocusRiskWeight := genomeLocusObject.RiskWeight + genomeLocusOddsRatio := genomeLocusObject.OddsRatio + + if (firstItemReached == false){ + locusRiskWeight = genomeLocusRiskWeight + locusOddsRatio = genomeLocusOddsRatio + firstItemReached = true + continue + } + if (locusRiskWeight == genomeLocusRiskWeight && locusOddsRatio == genomeLocusOddsRatio){ + // No conflict exists for this locus on the genomes we have already checked + continue + } + + // Conflict exists + return true, nil + } + } + + return false, nil + } + + conflictExists, err := getConflictExistsBool() + if (err != nil) { return emptyDiseaseInfoObject, err } + + newPersonPolygenicDiseaseInfoObject.ConflictExists = conflictExists + + return newPersonPolygenicDiseaseInfoObject, nil +} + + +//Outputs: +// -geneticAnalysis.PersonTraitInfo: Trait analysis object +// -error +func GetPersonTraitAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, traitObject traits.Trait)(geneticAnalysis.PersonTraitInfo, error){ + + // We use this when returning errors + emptyPersonTraitInfo := geneticAnalysis.PersonTraitInfo{} + + traitLociList := traitObject.LociList + traitRulesList := traitObject.RulesList + + // Map Structure: Genome Identifier -> PersonGenomeTraitInfo + newPersonTraitInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomeTraitInfo) + + for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{ + + genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier + genomeMap := genomeWithMetadataObject.GenomeMap + + // This map contains the locus values for the genome + // If an locus's entry doesn't exist, its value is unknown + // Map Structure: Locus rsID -> Locus Value + genomeLocusValuesMap := make(map[int64]locusValue.LocusValue) + + for _, locusRSID := range traitLociList{ + + locusBasePairKnown, _, _, _, locusValueObject, err := GetLocusValueFromGenomeMap(true, genomeMap, locusRSID) + if (err != nil) { return emptyPersonTraitInfo, err } + if (locusBasePairKnown == false){ + continue + } + + genomeLocusValuesMap[locusRSID] = locusValueObject + } + + // This map contains the trait outcome scores for the genome + // Map Structure: Outcome Name -> Score + // Example: "Intolerant" -> 5 + traitOutcomeScoresMap := make(map[string]int) + + // Map Structure: Rule Identifier -> Genome Passes rule (true if the genome passes the rule) + personPassesRulesMap := make(map[[3]byte]bool) + + if (len(traitRulesList) != 0){ + + // At least 1 rule exists for this trait + + for _, ruleObject := range traitRulesList{ + + ruleIdentifierHex := ruleObject.RuleIdentifier + + ruleIdentifier, err := encoding.DecodeHexStringTo3ByteArray(ruleIdentifierHex) + if (err != nil) { return emptyPersonTraitInfo, err } + + ruleLociList := ruleObject.LociList + + genomePassesRuleIsKnown, genomePassesRule, err := GetGenomePassesTraitRuleStatus(ruleLociList, genomeMap, false) + if (err != nil) { return emptyPersonTraitInfo, err } + if (genomePassesRuleIsKnown == false){ + continue + } + + personPassesRulesMap[ruleIdentifier] = genomePassesRule + + // The rule has been passed by this genome + // We add the outcome points for the rule to the traitOutcomeScoresMap + + ruleOutcomePointsMap := ruleObject.OutcomePointsMap + + for traitOutcome, pointsChange := range ruleOutcomePointsMap{ + + traitOutcomeScoresMap[traitOutcome] += pointsChange + } + } + } + + traitOutcomesList := traitObject.OutcomesList + + // We add all outcomes for which there were no points + + for _, traitOutcome := range traitOutcomesList{ + + _, exists := traitOutcomeScoresMap[traitOutcome] + if (exists == false){ + traitOutcomeScoresMap[traitOutcome] = 0 + } + } + + numberOfRulesTested := len(personPassesRulesMap) + + newPersonGenomeTraitInfo := geneticAnalysis.PersonGenomeTraitInfo{ + NumberOfRulesTested: numberOfRulesTested, + LocusValuesMap: genomeLocusValuesMap, + OutcomeScoresMap: traitOutcomeScoresMap, + GenomePassesRulesMap: personPassesRulesMap, + } + + newPersonTraitInfoMap[genomeIdentifier] = newPersonGenomeTraitInfo + } + + newPersonTraitInfoObject := geneticAnalysis.PersonTraitInfo{ + TraitInfoMap: newPersonTraitInfoMap, + } + + if (len(newPersonTraitInfoMap) <= 1){ + // We do not need to check for conflicts, there is only <=1 genome with trait information + // Nothing left to do. Analysis is complete. + return newPersonTraitInfoObject, nil + } + + // We check for conflicts + + getConflictExistsBool := func()(bool, error){ + + //TODO: Check for locus value conflicts once locus values are used in neural network prediction. + + if (len(traitRulesList) == 0){ + return false, nil + } + + // We check to see if the outcome scores are the same for all genomes + // We also check each rule result + + firstItemReached := false + + outcomeScoresMap := make(map[string]int) + passesRulesMap := make(map[[3]byte]bool) + + for _, genomeTraitInfoObject := range newPersonTraitInfoMap{ + + currentGenomeOutcomeScoresMap := genomeTraitInfoObject.OutcomeScoresMap + currentGenomePassesRulesMap := genomeTraitInfoObject.GenomePassesRulesMap + + if (firstItemReached == false){ + outcomeScoresMap = currentGenomeOutcomeScoresMap + passesRulesMap = currentGenomePassesRulesMap + firstItemReached = true + continue + } + + areEqual := maps.Equal(currentGenomeOutcomeScoresMap, outcomeScoresMap) + if (areEqual == false){ + // A conflict exists + return true, nil + } + areEqual = maps.Equal(currentGenomePassesRulesMap, passesRulesMap) + if (areEqual == false){ + // A conflict exists + return true, nil + } + } + + return false, nil + } + + conflictExists, err := getConflictExistsBool() + if (err != nil) { return emptyPersonTraitInfo, err } + + newPersonTraitInfoObject.ConflictExists = conflictExists + + return newPersonTraitInfoObject, nil +} + + +//Outputs: +// -int: Base pair disease locus risk weight +// -bool: Base pair disease locus odds ratio known +// -float64: Base pair disease locus odds ratio +// -error +func GetGenomePolygenicDiseaseLocusRiskInfo(locusRiskWeightsMap map[string]int, locusOddsRatiosMap map[string]float64, locusBase1Value string, locusBase2Value string)(int, bool, float64, error){ + + locusBasePairJoined := locusBase1Value + ";" + locusBase2Value + + riskWeight, exists := locusRiskWeightsMap[locusBasePairJoined] + if (exists == false){ + // This is an unknown base combination + // We will treat it as a 0 risk weight + return 0, true, 1, nil + } + + if (riskWeight == 0){ + return 0, true, 1, nil + } + + oddsRatio, exists := locusOddsRatiosMap[locusBasePairJoined] + if (exists == false){ + return riskWeight, false, 0, nil + } + + return riskWeight, true, oddsRatio, nil +} + +// This function checks to see if a genome will pass a trait rule +// Outputs: +// -bool: Genome passes trait rule status is known +// -bool: Genome passes trait rule +// -error +func GetGenomePassesTraitRuleStatus(ruleLociList []traits.RuleLocus, genomeMap map[int64]locusValue.LocusValue, checkForAliases bool)(bool, bool, error){ + + // We check to see if genome passes all rule loci + // To pass a rule, all of the rule's loci must be passed by the provided genome + // We consider a rule Known if the genome either passes all loci, or fails to pass 1 locus + // We consider a rule Unknown if any loci are unknown, and there are no rules which are known not to be passed + + anyLocusIsUnknown := false + + for _, locusObject := range ruleLociList{ + + locusRSID := locusObject.LocusRSID + + locusBasePairKnown, locusBase1, locusBase2, _, _, err := GetLocusValueFromGenomeMap(checkForAliases, genomeMap, locusRSID) + if (err != nil) { return false, false, err } + if (locusBasePairKnown == false){ + anyLocusIsUnknown = true + // We keep searching to see if any of the rule's loci are known to not pass + continue + } + + locusBasePairJoined := locusBase1 + ";" + locusBase2 + + locusBasePairsList := locusObject.BasePairsList + + genomePassesRuleLocus := slices.Contains(locusBasePairsList, locusBasePairJoined) + if (genomePassesRuleLocus == false){ + // The genome has failed to pass a single rule locus, thus, the rule is not passed + return true, false, nil + } + } + + if (anyLocusIsUnknown == true){ + // The rule is not passed, but it's status is unknown + // There were no rules which were known not to pass + return false, false, nil + } + + // All rules were passed + + return true, true, nil +} + + +// This function will retrieve the base pair of the locus from the input genome map +// We use this function because each rsID has aliases, so we must sometimes check those aliases to find locus values +// +// Outputs: +// -bool: Valid base pair value found +// -string: Base 1 Value (Nucleotide base for the SNP) +// -string: Base 2 Value (Nucleotide base for the SNP) +// -bool: Locus base pair is phased +// -locusValue.LocusValue +// -error +func GetLocusValueFromGenomeMap(checkForAliases bool, inputGenomeMap map[int64]locusValue.LocusValue, locusRSID int64)(bool, string, string, bool, locusValue.LocusValue, error){ + + // Outputs: + // -bool: Locus value found + // -locusValue.LocusValue + // -error + getLocusValue := func()(bool, locusValue.LocusValue, error){ + + currentLocusValue, exists := inputGenomeMap[locusRSID] + if (exists == true){ + return true, currentLocusValue, nil + } + + if (checkForAliases == false){ + return false, locusValue.LocusValue{}, nil + } + + // We check for aliases + + anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(locusRSID) + if (err != nil) { return false, locusValue.LocusValue{}, err } + if (anyAliasesExist == false){ + return false, locusValue.LocusValue{}, nil + } + + for _, rsidAlias := range rsidAliasesList{ + + currentLocusValue, exists := inputGenomeMap[rsidAlias] + if (exists == true){ + return true, currentLocusValue, nil + } + } + + return false, locusValue.LocusValue{}, nil + } + + locusValueFound, locusValueObject, err := getLocusValue() + if (err != nil) { return false, "", "", false, locusValue.LocusValue{}, err } + if (locusValueFound == false){ + return false, "", "", false, locusValue.LocusValue{}, nil + } + + base1Value := locusValueObject.Base1Value + base2Value := locusValueObject.Base2Value + locusIsPhased := locusValueObject.LocusIsPhased + + return true, base1Value, base2Value, locusIsPhased, locusValueObject, nil +} + + diff --git a/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis_test.go b/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis_test.go new file mode 100644 index 0000000..1947515 --- /dev/null +++ b/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis_test.go @@ -0,0 +1,164 @@ +package createPersonGeneticAnalysis_test + +import "seekia/internal/genetics/createPersonGeneticAnalysis" + +import "seekia/internal/genetics/readGeneticAnalysis" + +import "seekia/resources/geneticReferences/locusMetadata" +import "seekia/resources/geneticReferences/monogenicDiseases" +import "seekia/resources/geneticReferences/polygenicDiseases" +import "seekia/resources/geneticReferences/traits" + +import "seekia/internal/genetics/createRawGenomes" +import "seekia/internal/genetics/prepareRawGenomes" +import "seekia/internal/helpers" + +import "testing" +import "errors" + + +func TestCreatePersonGeneticAnalysis_SingleGenome(t *testing.T){ + + err := locusMetadata.InitializeLocusMetadataVariables() + if (err != nil) { + t.Fatalf("InitializeLocusMetadataVariables failed: " + err.Error()) + } + + monogenicDiseases.InitializeMonogenicDiseaseVariables() + polygenicDiseases.InitializePolygenicDiseaseVariables() + traits.InitializeTraitVariables() + + genomeIdentifier, err := helpers.GetNewRandom16ByteArray() + if (err != nil) { + t.Fatalf("Failed to get random 16 byte array: " + err.Error()) + } + + fakeRawGenome, _, _, _, err := createRawGenomes.CreateFakeRawGenome_AncestryDNA() + if (err != nil) { + t.Fatalf("Failed to create fake raw AncestryDNA genome: " + err.Error()) + } + + genomeIsValid, rawGenomeWithMetadata, err := prepareRawGenomes.CreateRawGenomeWithMetadataObject(genomeIdentifier, fakeRawGenome) + if (err != nil){ + t.Fatalf("CreateRawGenomeWithMetadataObject failed: " + err.Error()) + } + if (genomeIsValid == false){ + t.Fatalf("CreateRawGenomeWithMetadataObject failed: Genome is not valid.") + } + + genomesList := []prepareRawGenomes.RawGenomeWithMetadata{rawGenomeWithMetadata} + + updateProgressFunction := func(_ int)error{ + return nil + } + + checkIfProcessIsStoppedFunction := func()bool{ + return false + } + + processCompleted, personGeneticAnalysis, err := createPersonGeneticAnalysis.CreatePersonGeneticAnalysis(genomesList, updateProgressFunction, checkIfProcessIsStoppedFunction) + if (err != nil){ + t.Fatalf("Failed to create person genetic analysis: " + err.Error()) + } + if (processCompleted == false){ + t.Fatalf("Failed to create person genetic analysis: Process did not complete.") + } + + personGeneticAnalysisObject, err := readGeneticAnalysis.ReadPersonGeneticAnalysisString(personGeneticAnalysis) + if (err != nil){ + t.Fatalf("Failed to read person genetic analysis string: " + err.Error()) + } + + err = readGeneticAnalysis.VerifyPersonGeneticAnalysis(personGeneticAnalysisObject) + if (err != nil){ + t.Fatalf("Failed to read person genetic analysis: " + err.Error()) + } +} + + +func TestCreatePersonGeneticAnalysis_MultipleGenomes(t *testing.T){ + + err := locusMetadata.InitializeLocusMetadataVariables() + if (err != nil) { + t.Fatalf("InitializeLocusMetadataVariables failed: " + err.Error()) + } + + monogenicDiseases.InitializeMonogenicDiseaseVariables() + polygenicDiseases.InitializePolygenicDiseaseVariables() + traits.InitializeTraitVariables() + + numberOfGenomesToAdd := helpers.GetRandomIntWithinRange(2, 5) + + genomesList := make([]prepareRawGenomes.RawGenomeWithMetadata, 0, numberOfGenomesToAdd) + + for i:=0; i < numberOfGenomesToAdd; i++{ + + genomeIdentifier, err := helpers.GetNewRandom16ByteArray() + if (err != nil) { + t.Fatalf("Failed to get random 16 byte array: " + err.Error()) + } + + getFakeRawGenome := func()(string, error){ + + is23andMe := helpers.GetRandomBool() + if (is23andMe == true){ + fakeRawGenome, _, _, _, err := createRawGenomes.CreateFakeRawGenome_23andMe() + if (err != nil) { + return "", errors.New("Failed to create fake raw 23andMe genome: " + err.Error()) + } + + return fakeRawGenome, nil + } + + fakeRawGenome, _, _, _, err := createRawGenomes.CreateFakeRawGenome_AncestryDNA() + if (err != nil) { + return "", errors.New("Failed to create fake raw AncestryDNA genome: " + err.Error()) + } + + return fakeRawGenome, nil + } + + fakeRawGenome, err := getFakeRawGenome() + if (err != nil){ + t.Fatalf("Failed to get fake raw genome: " + err.Error()) + } + + genomeIsValid, rawGenomeWithMetadata, err := prepareRawGenomes.CreateRawGenomeWithMetadataObject(genomeIdentifier, fakeRawGenome) + if (err != nil){ + t.Fatalf("CreateRawGenomeWithMetadataObject failed: " + err.Error()) + } + if (genomeIsValid == false){ + t.Fatalf("CreateRawGenomeWithMetadataObject failed: Genome is not valid.") + } + + genomesList = append(genomesList, rawGenomeWithMetadata) + } + + updateProgressFunction := func(_ int)error{ + return nil + } + + checkIfProcessIsStoppedFunction := func()bool{ + return false + } + + processCompleted, personGeneticAnalysis, err := createPersonGeneticAnalysis.CreatePersonGeneticAnalysis(genomesList, updateProgressFunction, checkIfProcessIsStoppedFunction) + if (err != nil){ + t.Fatalf("Failed to create person genetic analysis: " + err.Error()) + } + if (processCompleted == false){ + t.Fatalf("Failed to create person genetic analysis: Process did not complete.") + } + + personGeneticAnalysisObject, err := readGeneticAnalysis.ReadPersonGeneticAnalysisString(personGeneticAnalysis) + if (err != nil){ + t.Fatalf("Failed to read person genetic analysis string: " + err.Error()) + } + + err = readGeneticAnalysis.VerifyPersonGeneticAnalysis(personGeneticAnalysisObject) + if (err != nil){ + t.Fatalf("Failed to read person genetic analysis: " + err.Error()) + } +} + + diff --git a/internal/genetics/createRawGenomes/createRawGenomes.go b/internal/genetics/createRawGenomes/createRawGenomes.go index 35cd430..c7502b9 100644 --- a/internal/genetics/createRawGenomes/createRawGenomes.go +++ b/internal/genetics/createRawGenomes/createRawGenomes.go @@ -1,6 +1,6 @@ // createRawGenomes provides functions to create fake raw genome files -// This package's functions are only used to test the readRawGenomes and createGeneticAnalysis packages. +// This package's functions are only used to test the readRawGenomes, createPersonGeneticAnalysis, and createCoupleGeneticAnalysis packages. package createRawGenomes diff --git a/internal/genetics/myAnalyses/myAnalyses.go b/internal/genetics/myAnalyses/myAnalyses.go index 098ae26..8750a8c 100644 --- a/internal/genetics/myAnalyses/myAnalyses.go +++ b/internal/genetics/myAnalyses/myAnalyses.go @@ -1,6 +1,6 @@ // myAnalyses provides functions to manage genome analyses for People and Couples -// Analyses are created using the createGeneticAnalysis package +// Analyses are created using the createPersonGeneticAnalysis and createCoupleGeneticAnalysis packagees package myAnalyses @@ -10,7 +10,8 @@ import "seekia/internal/encoding" import "seekia/internal/helpers" import "seekia/internal/localFilesystem" import "seekia/internal/myDatastores/myMapList" -import "seekia/internal/genetics/createGeneticAnalysis" +import "seekia/internal/genetics/createCoupleGeneticAnalysis" +import "seekia/internal/genetics/createPersonGeneticAnalysis" import "seekia/internal/genetics/geneticAnalysis" import "seekia/internal/genetics/prepareRawGenomes" import "seekia/internal/genetics/readGeneticAnalysis" @@ -673,7 +674,7 @@ func StartCreateNewPersonGeneticAnalysis(personIdentifier string)(string, error) return nil } - processCompleted, newGeneticAnalysisString, err := createGeneticAnalysis.CreatePersonGeneticAnalysis(genomesList, analysisUpdatePercentageCompleteFunction, checkIfProcessIsStopped) + processCompleted, newGeneticAnalysisString, err := createPersonGeneticAnalysis.CreatePersonGeneticAnalysis(genomesList, analysisUpdatePercentageCompleteFunction, checkIfProcessIsStopped) if (err != nil) { return err } if (processCompleted == false){ // User stopped the analysis mid-way @@ -986,7 +987,7 @@ func StartCreateNewCoupleGeneticAnalysis(inputPerson1Identifier string, inputPer return nil } - processCompleted, newGeneticAnalysisString, err := createGeneticAnalysis.CreateCoupleGeneticAnalysis(person1GenomesList, person2GenomesList, updateCoupleAnalysisPercentageCompleteFunction, checkIfProcessIsStopped) + processCompleted, newGeneticAnalysisString, err := createCoupleGeneticAnalysis.CreateCoupleGeneticAnalysis(person1GenomesList, person2GenomesList, updateCoupleAnalysisPercentageCompleteFunction, checkIfProcessIsStopped) if (err != nil) { return err } if (processCompleted == false){ // User stopped the analysis mid-way diff --git a/internal/profiles/calculatedAttributes/calculatedAttributes.go b/internal/profiles/calculatedAttributes/calculatedAttributes.go index a551024..10ceb30 100644 --- a/internal/profiles/calculatedAttributes/calculatedAttributes.go +++ b/internal/profiles/calculatedAttributes/calculatedAttributes.go @@ -18,7 +18,8 @@ import "seekia/internal/desires/myLocalDesires" import "seekia/internal/desires/myMateDesires" import "seekia/internal/encoding" import "seekia/internal/genetics/companyAnalysis" -import "seekia/internal/genetics/createGeneticAnalysis" +import "seekia/internal/genetics/createCoupleGeneticAnalysis" +import "seekia/internal/genetics/createPersonGeneticAnalysis" import "seekia/internal/genetics/locusValue" import "seekia/internal/genetics/myChosenAnalysis" import "seekia/internal/genetics/readGeneticAnalysis" @@ -556,7 +557,7 @@ func GetAnyProfileAttributeIncludingCalculated(attributeName string, getProfileA userProbabilityOfPassingADiseaseVariantInt, err := getUserProbabilityOfPassingADiseaseVariantInt() if (err != nil) { return false, 0, "", err } - probabilityOffspringHasDiseaseIsKnown, offspringPercentageProbabilityOfDisease, _, _, err := createGeneticAnalysis.GetOffspringMonogenicDiseaseProbabilities(diseaseIsDominantOrRecessive, myProbabilityIsKnown, myProbabilityOfPassingADiseaseVariant, userProbabilityIsKnown, userProbabilityOfPassingADiseaseVariantInt) + probabilityOffspringHasDiseaseIsKnown, offspringPercentageProbabilityOfDisease, _, _, err := createCoupleGeneticAnalysis.GetOffspringMonogenicDiseaseProbabilities(diseaseIsDominantOrRecessive, myProbabilityIsKnown, myProbabilityOfPassingADiseaseVariant, userProbabilityIsKnown, userProbabilityOfPassingADiseaseVariantInt) if (err != nil) { return false, 0, "", err } if (probabilityOffspringHasDiseaseIsKnown == false){ @@ -712,7 +713,7 @@ func GetAnyProfileAttributeIncludingCalculated(attributeName string, getProfileA userDiseaseLocusValuesMap[locusRSID] = userLocusValue } - anyLocusTested, userDiseaseRiskScore, _, _, err := createGeneticAnalysis.GetPersonGenomePolygenicDiseaseInfo(diseaseLociList, userDiseaseLocusValuesMap, true) + anyLocusTested, userDiseaseRiskScore, _, _, err := createPersonGeneticAnalysis.GetPersonGenomePolygenicDiseaseInfo(diseaseLociList, userDiseaseLocusValuesMap, true) if (err != nil) { return false, 0, "", err } if (anyLocusTested == false){ continue @@ -817,7 +818,7 @@ func GetAnyProfileAttributeIncludingCalculated(attributeName string, getProfileA userDiseaseLocusValuesMap[locusRSID] = newLocusValue } - anyLocusValuesTested, offspringAverageRiskScore, _, err := createGeneticAnalysis.GetOffspringPolygenicDiseaseInfo_Fast(diseaseLociList, myDiseaseLocusValuesMap, userDiseaseLocusValuesMap) + anyLocusValuesTested, offspringAverageRiskScore, _, err := createCoupleGeneticAnalysis.GetOffspringPolygenicDiseaseInfo_Fast(diseaseLociList, myDiseaseLocusValuesMap, userDiseaseLocusValuesMap) if (err != nil) { return false, 0, "", err } if (anyLocusValuesTested == false){ continue diff --git a/resources/geneticReferences/traits/traits.go b/resources/geneticReferences/traits/traits.go index 3259188..f555eec 100644 --- a/resources/geneticReferences/traits/traits.go +++ b/resources/geneticReferences/traits/traits.go @@ -7,7 +7,7 @@ package traits // These will be trained on a set of genomes and will output a probability analysis for each trait // This is only possible once we get access to the necessary training data // -// See createGeneticAnalysis.go for an explanation of how offspring trait prediction could work with neural nets +// See geneticPrediction.go for a non-working attempt to predict traits with neural nets import "errors" diff --git a/utilities/createSampleGeneticAnalyses/createSampleGeneticAnalyses.go b/utilities/createSampleGeneticAnalyses/createSampleGeneticAnalyses.go index 5bbe846..08e2f58 100644 --- a/utilities/createSampleGeneticAnalyses/createSampleGeneticAnalyses.go +++ b/utilities/createSampleGeneticAnalyses/createSampleGeneticAnalyses.go @@ -14,7 +14,8 @@ import "seekia/resources/geneticReferences/traits" import "seekia/internal/encoding" import "seekia/internal/localFilesystem" -import "seekia/internal/genetics/createGeneticAnalysis" +import "seekia/internal/genetics/createCoupleGeneticAnalysis" +import "seekia/internal/genetics/createPersonGeneticAnalysis" import "seekia/internal/genetics/prepareRawGenomes" import "errors" @@ -67,7 +68,7 @@ func main(){ return false } - processCompleted, personGeneticAnalysis, err := createGeneticAnalysis.CreatePersonGeneticAnalysis(personGenomesList, updateProgressFunction, checkIfProcessIsStoppedFunction) + processCompleted, personGeneticAnalysis, err := createPersonGeneticAnalysis.CreatePersonGeneticAnalysis(personGenomesList, updateProgressFunction, checkIfProcessIsStoppedFunction) if (err != nil){ return false, nil, "", errors.New("Failed to create person genetic analysis: " + err.Error()) } @@ -108,7 +109,7 @@ func main(){ return false } - processCompleted, coupleGeneticAnalysis, err := createGeneticAnalysis.CreateCoupleGeneticAnalysis(person1GenomeList, person2GenomeList, updateProgressFunction, checkIfProcessIsStoppedFunction) + processCompleted, coupleGeneticAnalysis, err := createCoupleGeneticAnalysis.CreateCoupleGeneticAnalysis(person1GenomeList, person2GenomeList, updateProgressFunction, checkIfProcessIsStoppedFunction) if (err != nil){ log.Println("Failed to create couple genetic analysis: " + err.Error()) return