// createPersonGeneticAnalysis provides functions to create a Person genetic analysis // These analyses are performed on one or more genome files. // They contain 3 categories of results: Monogenic Diseases, Polygenic Diseases and Traits // Use createCoupleGeneticAnalysis.go to create Couple genetic analyses package createPersonGeneticAnalysis // Disclaimer: I am a novice in the ways of genetics. This package could be flawed in numerous ways. // TODO: We want to eventually use neural nets for both trait and polygenic disease analysis (see geneticPrediction.go) // These will be trained on a set of genomes and will output a probability analysis for each trait/disease // This is only possible once we get access to the necessary training data // TODO: Add the ability to weight different genome files based on their reliability. // Some files are much more accurate because they record each location many times. import "seekia/resources/geneticReferences/locusMetadata" import "seekia/resources/geneticReferences/monogenicDiseases" import "seekia/resources/geneticReferences/polygenicDiseases" import "seekia/resources/geneticReferences/traits" import "seekia/internal/encoding" import "seekia/internal/genetics/geneticAnalysis" import "seekia/internal/genetics/geneticPrediction" import "seekia/internal/genetics/locusValue" import "seekia/internal/genetics/prepareRawGenomes" import "seekia/internal/helpers" import "errors" import "slices" import "reflect" //Outputs: // -bool: Process completed (it was not stopped manually before completion) // -string: New Genetic analysis string (Encoded in MessagePack) // -error func CreatePersonGeneticAnalysis(genomesList []prepareRawGenomes.RawGenomeWithMetadata, updatePercentageCompleteFunction func(int)error, checkIfProcessIsStopped func()bool)(bool, string, error){ prepareRawGenomesUpdatePercentageCompleteFunction := func(newPercentage int)error{ newPercentageCompletion, err := helpers.ScaleIntProportionally(true, newPercentage, 0, 100, 0, 50) if (err != nil){ return err } err = updatePercentageCompleteFunction(newPercentageCompletion) if (err != nil) { return err } return nil } anyUsefulLocationsExist, genomesWithMetadataList, allRawGenomeIdentifiersList, multipleGenomesExist, onlyExcludeConflictsGenomeIdentifier, onlyIncludeSharedGenomeIdentifier, err := prepareRawGenomes.GetGenomesWithMetadataListFromRawGenomesList(genomesList, prepareRawGenomesUpdatePercentageCompleteFunction) if (err != nil) { return false, "", err } if (anyUsefulLocationsExist == false){ // We should have checked for this when genomes were first imported. return false, "", errors.New("CreatePersonGeneticAnalysis called with genomeList containing no genomes with useful locations.") } if (len(genomesList) > 1 && (len(genomesList) != (len(genomesWithMetadataList)-2)) ){ // If there is more than 1 genome, 2 combined genomes are created // We are checking to make sure that none of the input genomes were dropped due to not having any locations // We should have checked to make sure each input genome has useful locations when each genome was first imported. return false, "", errors.New("CreatePersonGeneticAnalysis called with genomeList containing at least 1 genome without useful locations.") } // This map stores each genome's locus values // Map Structure: Genome Identifier -> Genome locus values map (rsID -> Locus Value) genomesMap := make(map[[16]byte]map[int64]locusValue.LocusValue) for _, genomeWithMetadata := range genomesWithMetadataList{ genomeIdentifier := genomeWithMetadata.GenomeIdentifier genomeMap := genomeWithMetadata.GenomeMap genomesMap[genomeIdentifier] = genomeMap } newGeneticAnalysisObject := geneticAnalysis.PersonAnalysis{ AnalysisVersion: 1, CombinedGenomesExist: multipleGenomesExist, AllRawGenomeIdentifiersList: allRawGenomeIdentifiersList, GenomesMap: genomesMap, } if (multipleGenomesExist == true){ newGeneticAnalysisObject.OnlyExcludeConflictsGenomeIdentifier = onlyExcludeConflictsGenomeIdentifier newGeneticAnalysisObject.OnlyIncludeSharedGenomeIdentifier = onlyIncludeSharedGenomeIdentifier } processIsStopped := checkIfProcessIsStopped() if (processIsStopped == true){ return false, "", nil } monogenicDiseasesList, err := monogenicDiseases.GetMonogenicDiseaseObjectsList() if (err != nil) { return false, "", err } // Map Structure: Disease Name -> PersonMonogenicDiseaseInfo analysisMonogenicDiseasesMap := make(map[string]geneticAnalysis.PersonMonogenicDiseaseInfo) for _, monogenicDiseaseObject := range monogenicDiseasesList{ diseaseName := monogenicDiseaseObject.DiseaseName personDiseaseAnalysisObject, err := GetPersonMonogenicDiseaseAnalysis(genomesWithMetadataList, monogenicDiseaseObject) if (err != nil) { return false, "", err } analysisMonogenicDiseasesMap[diseaseName] = personDiseaseAnalysisObject } newGeneticAnalysisObject.MonogenicDiseasesMap = analysisMonogenicDiseasesMap polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList() if (err != nil) { return false, "", err } // Map Structure: Disease Name -> PersonPolygenicDiseaseInfo analysisPolygenicDiseasesMap := make(map[string]geneticAnalysis.PersonPolygenicDiseaseInfo) for _, diseaseObject := range polygenicDiseaseObjectsList{ personDiseaseAnalysisObject, err := GetPersonPolygenicDiseaseAnalysis(genomesWithMetadataList, diseaseObject) if (err != nil) { return false, "", err } diseaseName := diseaseObject.DiseaseName analysisPolygenicDiseasesMap[diseaseName] = personDiseaseAnalysisObject } newGeneticAnalysisObject.PolygenicDiseasesMap = analysisPolygenicDiseasesMap traitObjectsList, err := traits.GetTraitObjectsList() if (err != nil) { return false, "", err } // This map will always contain an entry for each discrete trait // Map Structure: Trait Name -> PersonDiscreteTraitInfo analysisDiscreteTraitsMap := make(map[string]geneticAnalysis.PersonDiscreteTraitInfo) // This map will not contain entries for traits which this person's genome has no known loci // Map Structure: Trait Name -> PersonNumericTraitInfo analysisNumericTraitsMap := make(map[string]geneticAnalysis.PersonNumericTraitInfo) for _, traitObject := range traitObjectsList{ traitName := traitObject.TraitName traitIsDiscreteOrNumeric := traitObject.DiscreteOrNumeric if (traitIsDiscreteOrNumeric == "Discrete"){ personTraitAnalysisObject, err := GetPersonDiscreteTraitAnalysis(genomesWithMetadataList, traitObject) if (err != nil) { return false, "", err } analysisDiscreteTraitsMap[traitName] = personTraitAnalysisObject } } newGeneticAnalysisObject.DiscreteTraitsMap = analysisDiscreteTraitsMap newGeneticAnalysisObject.NumericTraitsMap = analysisNumericTraitsMap analysisBytes, err := encoding.EncodeMessagePackBytes(newGeneticAnalysisObject) if (err != nil) { return false, "", err } analysisString := string(analysisBytes) return true, analysisString, nil } //Outputs: // -geneticAnalysis.PersonMonogenicDiseaseInfo: Monogenic disease analysis object // -error func GetPersonMonogenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, diseaseObject monogenicDiseases.MonogenicDisease)(geneticAnalysis.PersonMonogenicDiseaseInfo, error){ emptyDiseaseInfoObject := geneticAnalysis.PersonMonogenicDiseaseInfo{} dominantOrRecessive := diseaseObject.DominantOrRecessive variantsList := diseaseObject.VariantsList // We use this map to keep track of which RSIDs corresponds to each variant // We also use it to have a map of all variants for the disease // Map Structure: Variant Identifier -> []rsID variantRSIDsMap := make(map[[3]byte][]int64) // This map stores all rsIDs for this monogenic disease // These are locations in the disease's gene which, if mutated, are known to cause the disease // We use this map to avoid duplicate rsIDs, because one rsID can have multiple variants which belong to it // We also store all alias rsIDs in this map allRSIDsMap := make(map[int64]struct{}) for _, variantObject := range variantsList{ variantIdentifierHex := variantObject.VariantIdentifier variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex) if (err != nil) { return emptyDiseaseInfoObject, err } variantRSID := variantObject.VariantRSID variantRSIDsList := []int64{variantRSID} // We add aliases to variantRSIDsList anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(variantRSID) if (err != nil) { return emptyDiseaseInfoObject, err } if (anyAliasesExist == true){ variantRSIDsList = append(variantRSIDsList, rsidAliasesList...) } variantRSIDsMap[variantIdentifier] = variantRSIDsList for _, rsID := range variantRSIDsList{ allRSIDsMap[rsID] = struct{}{} } } // Now we create a new map without any rsID aliases // Each rsID in this map represents a unique locus on the genome // Each rsID may have aliases, but they are not included in this map allUniqueRSIDsMap := make(map[int64]struct{}) for rsID, _ := range allRSIDsMap{ anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(rsID) if (err != nil) { return emptyDiseaseInfoObject, err } if (anyAliasesExist == false){ allUniqueRSIDsMap[rsID] = struct{}{} continue } // We see if we already added an alias of this rsID to the map checkIfAliasAlreadyExists := func()bool{ for _, rsIDAlias := range rsidAliasesList{ _, exists := allUniqueRSIDsMap[rsIDAlias] if (exists == true){ return true } } return false } aliasAlreadyExists := checkIfAliasAlreadyExists() if (aliasAlreadyExists == true){ // We already added this alias continue } allUniqueRSIDsMap[rsID] = struct{}{} } // Map Structure: Genome Identifier -> PersonGenomeMonogenicDiseaseInfo monogenicDiseaseInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomeMonogenicDiseaseInfo) for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{ genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier genomeMap := genomeWithMetadataObject.GenomeMap // This stores all variant info for this genome // Map Structure: Variant Identifier -> PersonGenomeMonogenicDiseaseVariantInfo variantsInfoMap := make(map[[3]byte]geneticAnalysis.PersonGenomeMonogenicDiseaseVariantInfo) for _, variantObject := range variantsList{ variantIdentifierHex := variantObject.VariantIdentifier variantIdentifier, err := encoding.DecodeHexStringTo3ByteArray(variantIdentifierHex) if (err != nil) { return emptyDiseaseInfoObject, err } variantRSID := variantObject.VariantRSID basePairValueFound, base1Value, base2Value, locusIsPhased, _, err := GetLocusValueFromGenomeMap(true, genomeMap, variantRSID) if (err != nil) { return emptyDiseaseInfoObject, err } if (basePairValueFound == false){ // This genome does not contain info for this variant // We skip it continue } // This genome has at least 1 variant variantDefectiveBase := variantObject.DefectiveBase getBaseIsVariantMutationBool := func(inputBase string)bool{ if (inputBase == variantDefectiveBase){ return true } // Base could be mutated to a different unhealthy base // That mutation could be a neutral/healthier change // We only care about this specific variant return false } base1IsDefective := getBaseIsVariantMutationBool(base1Value) base2IsDefective := getBaseIsVariantMutationBool(base2Value) newDiseaseVariantInfoObject := geneticAnalysis.PersonGenomeMonogenicDiseaseVariantInfo{ Base1HasVariant: base1IsDefective, Base2HasVariant: base2IsDefective, LocusIsPhased: locusIsPhased, } variantsInfoMap[variantIdentifier] = newDiseaseVariantInfoObject //TODO: Add LocusIsPhased to readGeneticAnalysis package } // We are done adding variant information for the genome // Now we determine probability that user will pass a disease variant to offspring, and if the user has the disease numberOfVariantsTested := len(variantsInfoMap) if (numberOfVariantsTested == 0){ // We don't know anything about this genome's disease risk for this disease // We won't add any information to the map continue } // This stores the number of loci that were tested // Each locus can have multiple potential variants numberOfLociTested := 0 // This stores the number of tested loci that are phased // A higher number means that the results are more potentially more accurate // It is only more accurate if multiple heterozygous variants on seperate loci exist. numberOfPhasedLoci := 0 for rsID, _ := range allUniqueRSIDsMap{ locusValueExists, _, _, locusIsPhased, _, err := GetLocusValueFromGenomeMap(true, genomeMap, rsID) if (err != nil) { return emptyDiseaseInfoObject, err } if (locusValueExists == false){ continue } numberOfLociTested += 1 if (locusIsPhased == true){ numberOfPhasedLoci += 1 } } // Outputs: // -bool: Person has disease // -float64: Probability Person will pass a defect (variant) to offspring (0-1) // -error getPersonDiseaseInfo := func()(bool, float64, error){ // These variables are used to count the number of defective variants that exist on each chromosome numberOfVariants_Chromosome1 := 0 numberOfVariants_Chromosome2 := 0 numberOfVariants_UnknownChromosome := 0 // We use this map to keep track of how many mutations exist for each rsID // This allows us to know if 2 different variant mutations exist for a single rsID // For example, base1 is a different deleterious mutation than base2 // If this ever happens, we know that the user has the disease, // because both copies of the gene locus are defective. rsidMutationsMap := make(map[int64]int) for variantIdentifier, variantInfoObject := range variantsInfoMap{ locusIsPhasedStatus := variantInfoObject.LocusIsPhased base1HasVariant := variantInfoObject.Base1HasVariant base2HasVariant := variantInfoObject.Base2HasVariant if (base1HasVariant == false && base2HasVariant == false){ // Neither chromosome contains the variant mutation. continue } if (base1HasVariant == true && base2HasVariant == true){ // Both chromosomes contain the same variant mutation. // Person has the disease. // Person will definitely pass disease variant to offspring. return true, 1, nil } // We know that this variant exists on 1 of the bases, but not both. variantRSIDsList, exists := variantRSIDsMap[variantIdentifier] if (exists == false){ return false, 0, errors.New("variantRSIDsMap missing variantIdentifier.") } for _, rsID := range variantRSIDsList{ rsidMutationsMap[rsID] += 1 } if (locusIsPhasedStatus == true){ if (base1HasVariant == true){ numberOfVariants_Chromosome1 += 1 } if (base2HasVariant == true){ numberOfVariants_Chromosome2 += 1 } } else { if (base1HasVariant == true || base2HasVariant == true){ numberOfVariants_UnknownChromosome += 1 } } } totalNumberOfVariants := numberOfVariants_Chromosome1 + numberOfVariants_Chromosome2 + numberOfVariants_UnknownChromosome if (totalNumberOfVariants == 0){ // Person does not have any disease variants. // They do not have the disease, and have no chance of passing a disease variant return false, 0, nil } // Now we check to see if there are any loci which have 2 different variants, one for each base for _, numberOfMutations := range rsidMutationsMap{ if (numberOfMutations >= 2){ // Person has 2 mutations on the same location // They must have the disease, and will definitely pass a variant to their offspring return true, 1, nil } } // At this point, we know that there are no homozygous variant mutations // All variant mutations are heterozygous, meaning the other chromosome strand's base is healthy //Outputs: // -bool: Person has disease getPersonHasDiseaseBool := func()bool{ if (dominantOrRecessive == "Dominant"){ // Only 1 variant is needed for the person to have the disease // We know they have at least 1 variant return true } // dominantOrRecessive == "Recessive" if (totalNumberOfVariants == 1){ // There is only 1 variant in total. // This single variant cannot exist on both chromosomes. // The person does not have the disease return false } // We know that there are at least 2 variants if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){ // We know there is at least 1 variant mutation on each chromosome // Therefore, the person has the disease return true } if (numberOfVariants_UnknownChromosome == 0){ // We know that variants do not exist on both chromosomes, only on 1. // Thus, the person does not have the disease return false } // We know there are at least 2 variants // We know there is at least 1 variant whose phase is unknown // If all mutations are on the same chromosome, the person does not have the disease. // If at least 1 mutation exists on each chromosome, the person does have the disease. // Either way, we don't know enough to say if the person has the disease. // We will report that they do not, because their genome does not conclusively say that they do. // This is why phased genomes are useful and provide a more accurate reading // TODO: Explain this to the user in the GUI // We must explain that unphased genomes will not detect disease sometimes return false } personHasDiseaseBool := getPersonHasDiseaseBool() // Output: // -float64: Probability person will pass a disease variant to their offspring (0-1) getPersonWillPassVariantProbability := func()float64{ if (totalNumberOfVariants == 1){ // There is only 1 variant on any chromosome // The probability of the person passing a variant is 50%. return 0.5 } // We know that there are at least 2 variants if (numberOfVariants_Chromosome1 >= 1 && numberOfVariants_Chromosome2 >= 1){ // We know there is at least 1 variant mutation on each chromosome // Therefore, the person will definitely pass a variant return 1 } if (numberOfVariants_UnknownChromosome == 0){ // We know that variants do not exist on both chromosomes, only on 1. // Thus, the person has a 50% probability of passing a variant return 0.5 } // We know all variants are heterozygous // From Wikipeia: // The human genome contains somewhere between 19,000 and 20,000 protein-coding genes. // These genes contain an average of 10 introns and the average size of an intron is about 6 kb (6,000 base pairs) // This means that the average size of a protein-coding gene is about 62 kb (62,000 base pairs) // The probability of a recombination breakpoint occurring within the gene is very small // If there is 1 breakpoint every 100 million locations, on average, and each gene is 62,000 base pairs long, // then the probability of a breakpoint occurring within a gene is 62,000/100,000,000 = 0.00062 = .062% // Thus, we disregard the risk of a breakpoint occurring within a gene // I also read somewhere that breakpoints are less likely to occurr within genes, which makes this likelihood even smaller // At this point, we know there at at least 2 variants // We know that at least 1 of the variants has an unknown phase // We don't know if all of the variants belong to the same chromosome // If variants exist on both chromosomes, then the probability of passing a variant is 100% // If all variants exist on the same chromosome, then the probability of passing a variant is 50% // We know there is at least a 50% chance of passing a variant, and possibly higher return 0.5 } personWillPassVariantProbability := getPersonWillPassVariantProbability() return personHasDiseaseBool, personWillPassVariantProbability, nil } personHasDisease, probabilityPersonWillPassAnyVariant, err := getPersonDiseaseInfo() if (err != nil) { return emptyDiseaseInfoObject, err } percentageProbabilityPersonWillPassADiseaseVariant := int(probabilityPersonWillPassAnyVariant * 100) diseaseAnalysisObject := geneticAnalysis.PersonGenomeMonogenicDiseaseInfo{ PersonHasDisease: personHasDisease, QuantityOfVariantsTested: numberOfVariantsTested, QuantityOfLociTested: numberOfLociTested, QuantityOfPhasedLoci: numberOfPhasedLoci, ProbabilityOfPassingADiseaseVariant: percentageProbabilityPersonWillPassADiseaseVariant, VariantsInfoMap: variantsInfoMap, } monogenicDiseaseInfoMap[genomeIdentifier] = diseaseAnalysisObject } personMonogenicDiseaseInfoObject := geneticAnalysis.PersonMonogenicDiseaseInfo{ MonogenicDiseaseInfoMap: monogenicDiseaseInfoMap, } if (len(monogenicDiseaseInfoMap) <= 1){ // We do not need to check for conflicts, there is only <=1 genome with disease information // Nothing left to do. Analysis is complete. return personMonogenicDiseaseInfoObject, nil } // We check for conflicts getConflictExistsBool := func()(bool, error){ firstItemReached := false personHasDisease := false probabilityOfPassingAVariant := 0 for _, currentGenomeDiseaseAnalysisObject := range monogenicDiseaseInfoMap{ currentGenomePersonHasDisease := currentGenomeDiseaseAnalysisObject.PersonHasDisease currentGenomeProbabilityOfPassingAVariant := currentGenomeDiseaseAnalysisObject.ProbabilityOfPassingADiseaseVariant if (firstItemReached == false){ personHasDisease = currentGenomePersonHasDisease probabilityOfPassingAVariant = currentGenomeProbabilityOfPassingAVariant firstItemReached = true continue } if (currentGenomePersonHasDisease != personHasDisease){ return true, nil } if (currentGenomeProbabilityOfPassingAVariant != probabilityOfPassingAVariant){ return true, nil } } // Now we test variants for conflicts // We are only doing this to see if there are variants which one genome has and another doesn't // For example, the analysis results say that you have a 50% chance of passing a variant for both genomes, but // they have detected a different variant for each genome. // This means that your real risk of passing a variant may actually be higher, and you are more likely to have the disease too for variantIdentifier, _ := range variantRSIDsMap{ // Each variant base pair is either true/false, true/true, false/false, false/true // Two different genomes have true/false and false/true, it will not count as a conflict // If the locus is unphased, then there is no difference between true/false and false/true // If the locus is phased, then this flip is only meaningful if it effects the probability of disease/passing a variant // We already checked those probabilities for conflicts earlier // Therefore, any flip is not considered a conflict // We only care about conflicts where 1 genome says you have a variant and the other says you don't, or // one says you have only 1 mutation and the other says you have 2 at that location firstItemReached := false base1HasVariant := false base2HasVariant := false for _, currentGenomeDiseaseAnalysisObject := range monogenicDiseaseInfoMap{ variantsInfoMap := currentGenomeDiseaseAnalysisObject.VariantsInfoMap variantInfoObject, exists := variantsInfoMap[variantIdentifier] if (exists == false){ if (firstItemReached == true){ // A previous genome has information for this variant, and the current one does not return true, nil } continue } currentBase1HasVariant := variantInfoObject.Base1HasVariant currentBase2HasVariant := variantInfoObject.Base2HasVariant if (firstItemReached == false){ base1HasVariant = currentBase1HasVariant base2HasVariant = currentBase2HasVariant firstItemReached = true continue } if (base1HasVariant == currentBase1HasVariant && base2HasVariant == currentBase2HasVariant){ // No conflict exists continue } if (base1HasVariant == currentBase2HasVariant && base2HasVariant == currentBase1HasVariant){ // We don't count this as a conflict continue } // A conflict exists return true, nil } } return false, nil } conflictExists, err := getConflictExistsBool() if (err != nil) { return emptyDiseaseInfoObject, err } personMonogenicDiseaseInfoObject.ConflictExists = conflictExists return personMonogenicDiseaseInfoObject, nil } //Outputs: // -bool: Any loci tested // -int: Person genome risk score (value between 0-10) // -int: Person Genome Number of loci tested // -map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo: Person disease locus info map // Map Structure: Locus Identifier -> PersonGenomePolygenicDiseaseLocusInfo // -error func GetPersonGenomePolygenicDiseaseInfo(diseaseLociList []polygenicDiseases.DiseaseLocus, personLocusValuesMap map[int64]locusValue.LocusValue, lookForLocusAliases bool)(bool, int, int, map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo, error){ if (len(personLocusValuesMap) == 0){ return false, 0, 0, nil, nil } // Map Structure: Locus Identifier -> PersonGenomePolygenicDiseaseLocusInfo genomeLociInfoMap := make(map[[3]byte]geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo) summedDiseaseRiskWeight := 0 minimumPossibleRiskWeightSum := 0 maximumPossibleRiskWeightSum := 0 for _, locusObject := range diseaseLociList{ locusRSID := locusObject.LocusRSID locusRiskWeightsMap := locusObject.RiskWeightsMap locusOddsRatiosMap := locusObject.OddsRatiosMap locusMinimumWeight := locusObject.MinimumRiskWeight locusMaximumWeight := locusObject.MaximumRiskWeight locusValueFound, locusBase1Value, locusBase2Value, _, _, err := GetLocusValueFromGenomeMap(lookForLocusAliases, personLocusValuesMap, locusRSID) if (err != nil) { return false, 0, 0, nil, err } if (locusValueFound == false){ continue } locusRiskWeight, locusOddsRatioIsKnown, locusOddsRatio, err := GetGenomePolygenicDiseaseLocusRiskInfo(locusRiskWeightsMap, locusOddsRatiosMap, locusBase1Value, locusBase2Value) if (err != nil) { return false, 0, 0, nil, err } newLocusInfoObject := geneticAnalysis.PersonGenomePolygenicDiseaseLocusInfo{ RiskWeight: locusRiskWeight, OddsRatioIsKnown: locusOddsRatioIsKnown, } if (locusOddsRatioIsKnown == true){ newLocusInfoObject.OddsRatio = locusOddsRatio } locusIdentifierHex := locusObject.LocusIdentifier locusIdentifier, err := encoding.DecodeHexStringTo3ByteArray(locusIdentifierHex) if (err != nil) { return false, 0, 0, nil, err } genomeLociInfoMap[locusIdentifier] = newLocusInfoObject minimumPossibleRiskWeightSum += locusMinimumWeight maximumPossibleRiskWeightSum += locusMaximumWeight summedDiseaseRiskWeight += locusRiskWeight } numberOfLociTested := len(genomeLociInfoMap) if (numberOfLociTested == 0){ // We have no information about this disease for this genome return false, 0, 0, nil, nil } diseaseRiskScore, err := helpers.ScaleIntProportionally(true, summedDiseaseRiskWeight, minimumPossibleRiskWeightSum, maximumPossibleRiskWeightSum, 0, 10) if (err != nil) { return false, 0, 0, nil, err } return true, diseaseRiskScore, numberOfLociTested, genomeLociInfoMap, nil } //Outputs: // -geneticAnalysis.PersonPolygenicDiseaseInfo // -error func GetPersonPolygenicDiseaseAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, diseaseObject polygenicDiseases.PolygenicDisease)(geneticAnalysis.PersonPolygenicDiseaseInfo, error){ // We use this when returning errors emptyDiseaseInfoObject := geneticAnalysis.PersonPolygenicDiseaseInfo{} diseaseLociList := diseaseObject.LociList // This map stores the polygenic disease for each of the person's genomes // Map Structure: Genome Identifier -> PersonGenomePolygenicDiseaseInfo personPolygenicDiseaseInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomePolygenicDiseaseInfo) // We construct polygenic disease probability info for each genome for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{ genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier genomeMap := genomeWithMetadataObject.GenomeMap // This map stores the loci for this disease and does not contain loci which do not belong to this disease // Map Structure: rsID -> Locus Value genomeLocusValuesMap := make(map[int64]locusValue.LocusValue) for _, locusObject := range diseaseLociList{ locusRSID := locusObject.LocusRSID locusValueFound, _, _, _, locusValueObject, err := GetLocusValueFromGenomeMap(true, genomeMap, locusRSID) if (err != nil) { return emptyDiseaseInfoObject, err } if (locusValueFound == false){ continue } genomeLocusValuesMap[locusRSID] = locusValueObject } anyLociTested, personDiseaseRiskScore, genomeNumberOfLociTested, genomeLociInfoMap, err := GetPersonGenomePolygenicDiseaseInfo(diseaseLociList, genomeLocusValuesMap, true) if (err != nil) { return emptyDiseaseInfoObject, err } if (anyLociTested == false){ continue } newDiseaseInfoObject := geneticAnalysis.PersonGenomePolygenicDiseaseInfo{ QuantityOfLociTested: genomeNumberOfLociTested, RiskScore: personDiseaseRiskScore, LociInfoMap: genomeLociInfoMap, } personPolygenicDiseaseInfoMap[genomeIdentifier] = newDiseaseInfoObject } newPersonPolygenicDiseaseInfoObject := geneticAnalysis.PersonPolygenicDiseaseInfo{ PolygenicDiseaseInfoMap: personPolygenicDiseaseInfoMap, } if (len(personPolygenicDiseaseInfoMap) <= 1){ // We do not need to check for conflicts, there is only <=1 genome with disease information // Nothing left to do. Analysis is complete. return newPersonPolygenicDiseaseInfoObject, nil } // We check for conflicts between the different genome's results getConflictExistsBool := func()(bool, error){ // First we check to see if any of the genomes have different risk scores or NumberOfLociTested genomeRiskScore := 0 genomeNumberOfLociTested := 0 firstItemReached := false for _, personGenomeDiseaseInfoObject := range personPolygenicDiseaseInfoMap{ currentGenomeRiskScore := personGenomeDiseaseInfoObject.RiskScore currentGenomeNumberOfLociTested := personGenomeDiseaseInfoObject.QuantityOfLociTested if (firstItemReached == false){ genomeRiskScore = currentGenomeRiskScore genomeNumberOfLociTested = currentGenomeNumberOfLociTested firstItemReached = true continue } if (genomeRiskScore != currentGenomeRiskScore){ return true, nil } if (genomeNumberOfLociTested != currentGenomeNumberOfLociTested){ return true, nil } } // Now we check for conflicts between the different locus values // We consider a conflict any time the same locus has different weights/odds ratios // We don't care if the loci have different base pair values, so long as those base pairs have the same risk weights/odds ratios for _, locusObject := range diseaseLociList{ locusIdentifierHex := locusObject.LocusIdentifier locusIdentifier, err := encoding.DecodeHexStringTo3ByteArray(locusIdentifierHex) if (err != nil) { return false, err } locusRiskWeight := 0 locusOddsRatio := float64(0) firstItemReached := false for _, personGenomeDiseaseInfoObject := range personPolygenicDiseaseInfoMap{ genomeLociInfoMap := personGenomeDiseaseInfoObject.LociInfoMap genomeLocusObject, exists := genomeLociInfoMap[locusIdentifier] if (exists == false){ if (firstItemReached == true){ // A previous genome has information for this locus, and the current one does not return true, nil } continue } genomeLocusRiskWeight := genomeLocusObject.RiskWeight genomeLocusOddsRatio := genomeLocusObject.OddsRatio if (firstItemReached == false){ locusRiskWeight = genomeLocusRiskWeight locusOddsRatio = genomeLocusOddsRatio firstItemReached = true continue } if (locusRiskWeight == genomeLocusRiskWeight && locusOddsRatio == genomeLocusOddsRatio){ // No conflict exists for this locus on the genomes we have already checked continue } // Conflict exists return true, nil } } return false, nil } conflictExists, err := getConflictExistsBool() if (err != nil) { return emptyDiseaseInfoObject, err } newPersonPolygenicDiseaseInfoObject.ConflictExists = conflictExists return newPersonPolygenicDiseaseInfoObject, nil } //Outputs: // -geneticAnalysis.PersonDiscreteTraitInfo: Trait analysis object // -error func GetPersonDiscreteTraitAnalysis(inputGenomesWithMetadataList []prepareRawGenomes.GenomeWithMetadata, traitObject traits.Trait)(geneticAnalysis.PersonDiscreteTraitInfo, error){ // Map Structure: Genome Identifier -> PersonGenomeDiscreteTraitInfo newPersonTraitInfoMap := make(map[[16]byte]geneticAnalysis.PersonGenomeDiscreteTraitInfo) for _, genomeWithMetadataObject := range inputGenomesWithMetadataList{ genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier genomeMap := genomeWithMetadataObject.GenomeMap newPersonGenomeTraitInfo := geneticAnalysis.PersonGenomeDiscreteTraitInfo{} neuralNetworkExists, neuralNetworkOutcomeIsKnown, predictedOutcome, predictionConfidence, quantityOfLociTested, quantityOfPhasedLoci, err := GetGenomeDiscreteTraitAnalysis_NeuralNetwork(traitObject, genomeMap, true) if (err != nil) { return geneticAnalysis.PersonDiscreteTraitInfo{}, err } if (neuralNetworkExists == true){ newPersonGenomeTraitInfo.NeuralNetworkExists = true if (neuralNetworkOutcomeIsKnown == true){ newPersonGenomeTraitInfo.NeuralNetworkAnalysisExists = true newPersonGenomeTraitInfo_NeuralNetwork := geneticAnalysis.PersonGenomeDiscreteTraitInfo_NeuralNetwork{ PredictedOutcome: predictedOutcome, PredictionConfidence: predictionConfidence, QuantityOfLociKnown: quantityOfLociTested, QuantityOfPhasedLoci: quantityOfPhasedLoci, } newPersonGenomeTraitInfo.NeuralNetworkAnalysis = newPersonGenomeTraitInfo_NeuralNetwork } } anyRulesExist, quantityOfRulesTested, quantityOfLociKnown, genomePassesRulesMap, predictedOutcomeExists, predictedOutcome, err := GetGenomeDiscreteTraitAnalysis_Rules(traitObject, genomeMap, true) if (err != nil) { return geneticAnalysis.PersonDiscreteTraitInfo{}, err } if (anyRulesExist == true){ newPersonGenomeTraitInfo.AnyRulesExist = true if (quantityOfRulesTested != 0){ newPersonGenomeTraitInfo.RulesAnalysisExists = true newPersonGenomeTraitInfo_Rules := geneticAnalysis.PersonGenomeDiscreteTraitInfo_Rules{ GenomePassesRulesMap: genomePassesRulesMap, PredictedOutcomeExists: predictedOutcomeExists, PredictedOutcome: predictedOutcome, QuantityOfRulesTested: quantityOfRulesTested, QuantityOfLociKnown: quantityOfLociKnown, } newPersonGenomeTraitInfo.RulesAnalysis = newPersonGenomeTraitInfo_Rules } } newPersonTraitInfoMap[genomeIdentifier] = newPersonGenomeTraitInfo } newPersonTraitInfoObject := geneticAnalysis.PersonDiscreteTraitInfo{ TraitInfoMap: newPersonTraitInfoMap, } if (len(newPersonTraitInfoMap) <= 1){ // We do not need to check for conflicts, there is only <=1 genome with trait information // Nothing left to do. Analysis is complete. return newPersonTraitInfoObject, nil } // We check for conflicts getConflictExistsBool := func()(bool, error){ // We check to see if the analysis results are the same for all genomes firstItemReached := false personGenomeTraitInfoObject := geneticAnalysis.PersonGenomeDiscreteTraitInfo{} for _, genomeTraitInfoObject := range newPersonTraitInfoMap{ if (firstItemReached == false){ personGenomeTraitInfoObject = genomeTraitInfoObject continue } areEqual := reflect.DeepEqual(personGenomeTraitInfoObject, genomeTraitInfoObject) if (areEqual == false){ return true, nil } } return false, nil } conflictExists, err := getConflictExistsBool() if (err != nil) { return geneticAnalysis.PersonDiscreteTraitInfo{}, err } newPersonTraitInfoObject.ConflictExists = conflictExists return newPersonTraitInfoObject, nil } //Outputs: // -int: Base pair disease locus risk weight // -bool: Base pair disease locus odds ratio known // -float64: Base pair disease locus odds ratio // -error func GetGenomePolygenicDiseaseLocusRiskInfo(locusRiskWeightsMap map[string]int, locusOddsRatiosMap map[string]float64, locusBase1Value string, locusBase2Value string)(int, bool, float64, error){ locusBasePairJoined := locusBase1Value + ";" + locusBase2Value riskWeight, exists := locusRiskWeightsMap[locusBasePairJoined] if (exists == false){ // This is an unknown base combination // We will treat it as a 0 risk weight return 0, true, 1, nil } if (riskWeight == 0){ return 0, true, 1, nil } oddsRatio, exists := locusOddsRatiosMap[locusBasePairJoined] if (exists == false){ return riskWeight, false, 0, nil } return riskWeight, true, oddsRatio, nil } // We use this to generate trait predictions using a neural network // The alternative prediction method is to use Rules (see GetGenomeTraitAnalysis_Rules) //Outputs: // -bool: Trait Neural network analysis available (if false, we can't predict this trait using a neural network) // -bool: Neural network outcome is known (at least 1 locus value is known which is needed for the neural network // -string: The predicted outcome (Example: "Blue") // -int: Probability (0-100) that the outcome from neural network is true (confidence) // -int: Quantity of loci tested // -int: Quantity of phased loci // -error func GetGenomeDiscreteTraitAnalysis_NeuralNetwork(traitObject traits.Trait, genomeMap map[int64]locusValue.LocusValue, checkForAliases bool)(bool, bool, string, int, int, int, error){ getGenomeLocusValuesMap := func()(map[int64]locusValue.LocusValue, error){ if (checkForAliases == false){ // We don't need to check for rsID aliases. return genomeMap, nil } traitLociList := traitObject.LociList // This map contains the locus values for the genome // If a locus's entry doesn't exist, its value is unknown // Map Structure: Locus rsID -> Locus Value genomeLocusValuesMap := make(map[int64]locusValue.LocusValue) for _, locusRSID := range traitLociList{ locusBasePairKnown, _, _, _, locusValueObject, err := GetLocusValueFromGenomeMap(checkForAliases, genomeMap, locusRSID) if (err != nil) { return nil, err } if (locusBasePairKnown == false){ continue } genomeLocusValuesMap[locusRSID] = locusValueObject } return genomeLocusValuesMap, nil } genomeLocusValuesMap, err := getGenomeLocusValuesMap() if (err != nil) { return false, false, "", 0, 0, 0, err } traitName := traitObject.TraitName neuralNetworkModelExists, traitPredictionIsPossible, predictedOutcome, predictionConfidence, quantityOfLociKnown, quantityOfPhasedLoci, err := geneticPrediction.GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap(traitName, genomeLocusValuesMap) if (err != nil) { return false, false, "", 0, 0, 0, err } if (neuralNetworkModelExists == false){ return false, false, "", 0, 0, 0, nil } if (traitPredictionIsPossible == false){ return true, false, "", 0, 0, 0, nil } return true, true, predictedOutcome, predictionConfidence, quantityOfLociKnown, quantityOfPhasedLoci, nil } //Outputs: // -bool: Rule-based trait prediction is available // -int: Quantity of trait rules tested // -int: Quantity of loci known // -map[[3]byte]bool: Passed rules map (Rule Identifier -> Genome passes rule) // -bool: Rule-based prediction outcome is known (at least 1 rule has been tested and there is no outcome tie) // -string: The predicted outcome (Example: "Tolerant") // -error func GetGenomeDiscreteTraitAnalysis_Rules(traitObject traits.Trait, genomeMap map[int64]locusValue.LocusValue, checkForAliases bool)(bool, int, int, map[[3]byte]bool, bool, string, error){ traitIsDiscreteOrNumeric := traitObject.DiscreteOrNumeric if (traitIsDiscreteOrNumeric != "Discrete"){ return false, 0, 0, nil, false, "", errors.New("GetGenomeDiscreteTraitAnalysis_Rules called with non-discrete trait.") } traitRulesList := traitObject.RulesList if (len(traitRulesList) == 0){ // We can't predict this trait using rules // This means that neural network prediction is the only alternative potential way to predict this trait return false, 0, 0, nil, false, "", nil } // This map contains the trait outcome scores for the genome // Map Structure: Outcome Name -> Score // Example: "Intolerant" -> 5 traitOutcomeScoresMap := make(map[string]int) // Map Structure: Rule Identifier -> Genome Passes rule (true if the genome passes the rule) personPassesRulesMap := make(map[[3]byte]bool) // This map stores each known loci // Multiple rules can use the same loci, so we need a map to avoid duplicates // Map Structure: Locus RSID -> Locus is known lociAreKnownMap := make(map[int64]bool) for _, ruleObject := range traitRulesList{ ruleIdentifierHex := ruleObject.RuleIdentifier ruleIdentifier, err := encoding.DecodeHexStringTo3ByteArray(ruleIdentifierHex) if (err != nil) { return false, 0, 0, nil, false, "", err } ruleLociList := ruleObject.LociList for _, locusObject := range ruleLociList{ locusRSID := locusObject.LocusRSID _, exists := lociAreKnownMap[locusRSID] if (exists == true){ // We already know if this locus is known continue } locusIsKnown, _, _, _, _, err := GetLocusValueFromGenomeMap(checkForAliases, genomeMap, locusRSID) if (err != nil) { return false, 0, 0, nil, false, "", err } lociAreKnownMap[locusRSID] = locusIsKnown } genomePassesRuleIsKnown, genomePassesRule, err := GetGenomePassesDiscreteTraitRuleStatus(ruleLociList, genomeMap, checkForAliases) if (err != nil) { return false, 0, 0, nil, false, "", err } if (genomePassesRuleIsKnown == false){ continue } personPassesRulesMap[ruleIdentifier] = genomePassesRule // The rule has been passed by this genome // We add the outcome points for the rule to the traitOutcomeScoresMap ruleOutcomePointsMap := ruleObject.OutcomePointsMap for traitOutcome, pointsChange := range ruleOutcomePointsMap{ traitOutcomeScoresMap[traitOutcome] += pointsChange } } quantityOfLociKnown := 0 for _, locusIsKnown := range lociAreKnownMap{ if (locusIsKnown == true){ quantityOfLociKnown += 1 } } quantityOfRulesTested := len(personPassesRulesMap) if (quantityOfRulesTested == 0){ return true, 0, quantityOfLociKnown, personPassesRulesMap, false, "", nil } // -bool: Outcome is known (will be false if there is a tie // -string: // -error getOutcome := func()(bool, string, error){ largestOutcome := "" largestOutcomePoints := 0 tieExists := false for outcomeName, outcomePoints := range traitOutcomeScoresMap{ if (outcomePoints < 1){ // This should never happen, because outcomes points should only be increased by integers which are at least 1 or greater return false, "", errors.New("traitOutcomeScoresMap contains outcomePoints < 1.") } if (outcomePoints > largestOutcomePoints){ largestOutcome = outcomeName largestOutcomePoints = outcomePoints tieExists = false continue } else if (outcomePoints == largestOutcomePoints){ tieExists = true } } if (tieExists == true){ return false, "", nil } return true, largestOutcome, nil } outcomeIsKnown, outcomeName, err := getOutcome() if (err != nil) { return false, 0, 0, nil, false, "", err } if (outcomeIsKnown == false){ return true, quantityOfRulesTested, quantityOfLociKnown, personPassesRulesMap, false, "", nil } return true, quantityOfRulesTested, quantityOfLociKnown, personPassesRulesMap, true, outcomeName, nil } // This function checks to see if a genome will pass a trait rule // Outputs: // -bool: Genome passes trait rule status is known // -bool: Genome passes trait rule // -error func GetGenomePassesDiscreteTraitRuleStatus(ruleLociList []traits.RuleLocus, genomeMap map[int64]locusValue.LocusValue, checkForAliases bool)(bool, bool, error){ // We check to see if genome passes all rule loci // To pass a rule, all of the rule's loci must be passed by the provided genome // We consider a rule Known if the genome either passes all loci, or fails to pass 1 locus // We consider a rule Unknown if any loci are unknown, and there are no rules which are known not to be passed anyLocusIsUnknown := false for _, locusObject := range ruleLociList{ locusRSID := locusObject.LocusRSID locusBasePairKnown, locusBase1, locusBase2, _, _, err := GetLocusValueFromGenomeMap(checkForAliases, genomeMap, locusRSID) if (err != nil) { return false, false, err } if (locusBasePairKnown == false){ anyLocusIsUnknown = true // We keep searching to see if any of the rule's loci are known to not pass continue } locusBasePairJoined := locusBase1 + ";" + locusBase2 locusBasePairsList := locusObject.BasePairsList genomePassesRuleLocus := slices.Contains(locusBasePairsList, locusBasePairJoined) if (genomePassesRuleLocus == false){ // The genome has failed to pass a single rule locus, thus, the rule is not passed return true, false, nil } } if (anyLocusIsUnknown == true){ // The rule is not passed, but it's status is unknown // There were no rules which were known not to pass return false, false, nil } // All rules were passed return true, true, nil } // This function will retrieve the base pair of the locus from the input genome map // We use this function because each rsID has aliases, so we must sometimes check those aliases to find locus values // // Outputs: // -bool: Valid base pair value found // -string: Base 1 Value (Nucleotide base for the SNP) // -string: Base 2 Value (Nucleotide base for the SNP) // -bool: Locus base pair is phased // -locusValue.LocusValue // -error func GetLocusValueFromGenomeMap(checkForAliases bool, inputGenomeMap map[int64]locusValue.LocusValue, locusRSID int64)(bool, string, string, bool, locusValue.LocusValue, error){ // Outputs: // -bool: Locus value found // -locusValue.LocusValue // -error getLocusValue := func()(bool, locusValue.LocusValue, error){ currentLocusValue, exists := inputGenomeMap[locusRSID] if (exists == true){ return true, currentLocusValue, nil } if (checkForAliases == false){ return false, locusValue.LocusValue{}, nil } // We check for aliases anyAliasesExist, rsidAliasesList, err := locusMetadata.GetRSIDAliases(locusRSID) if (err != nil) { return false, locusValue.LocusValue{}, err } if (anyAliasesExist == false){ return false, locusValue.LocusValue{}, nil } for _, rsidAlias := range rsidAliasesList{ currentLocusValue, exists := inputGenomeMap[rsidAlias] if (exists == true){ return true, currentLocusValue, nil } } return false, locusValue.LocusValue{}, nil } locusValueFound, locusValueObject, err := getLocusValue() if (err != nil) { return false, "", "", false, locusValue.LocusValue{}, err } if (locusValueFound == false){ return false, "", "", false, locusValue.LocusValue{}, nil } base1Value := locusValueObject.Base1Value base2Value := locusValueObject.Base2Value locusIsPhased := locusValueObject.LocusIsPhased return true, base1Value, base2Value, locusIsPhased, locusValueObject, nil }