seekia/internal/genetics/geneticAnalysis/geneticAnalysis.go

461 lines
17 KiB
Go
Raw Normal View History

// geneticAnalysis implements the geneticAnalysis objects
// There are 2 geneticAnalysis types: Person and Couple
package geneticAnalysis
import "seekia/internal/genetics/locusValue"
type PersonAnalysis struct{
AnalysisVersion int
// This is a list of each raw genome identifier (not including combined genomes)
AllRawGenomeIdentifiersList [][16]byte
// This is true if there is more than 1 raw genome
CombinedGenomesExist bool
// These are the identifiers for the combined genomes
// These only exist if CombinedGenomesExist == true
OnlyIncludeSharedGenomeIdentifier [16]byte
OnlyExcludeConflictsGenomeIdentifier [16]byte
// This map stores each genome's locus values
// Only the loci that belong in the locusMetadata package are inside of this map
// This is necessary, otherwise genetic analyses would be too large by containing each analyzed raw genome.
// Map Structure: Genome Identifier -> Genome locus values map (rsID -> Locus Value)
GenomesMap map[[16]byte]map[int64]locusValue.LocusValue
// Map Structure: Disease Name -> PersonMonogenicDiseaseInfo
MonogenicDiseasesMap map[string]PersonMonogenicDiseaseInfo
// Map Structure: Disease Name -> PersonPolygenicDiseaseInfo
PolygenicDiseasesMap map[string]PersonPolygenicDiseaseInfo
// These are traits which have discrete outcomes, rather than numeric outcomes
// For example: Eye color
// Map Structure: Trait Name -> Trait Info Object
DiscreteTraitsMap map[string]PersonDiscreteTraitInfo
// These are traits which have numeric outcomes, rather than discrete outcomes
// For example: Height
// Map Structure: Trait Name -> Trait Info Object
NumericTraitsMap map[string]PersonNumericTraitInfo
}
type PersonMonogenicDiseaseInfo struct{
// This map gives information about each genome's monogenic disease risk
// If no map entries exist, then no disease info is known
// Map Structure: Genome Identifier -> PersonGenomeMonogenicDiseaseInfo
MonogenicDiseaseInfoMap map[[16]byte]PersonGenomeMonogenicDiseaseInfo
// This is true if there are multiple genomes and the results for each genome differ
ConflictExists bool
}
type PersonGenomeMonogenicDiseaseInfo struct{
// This describes if the person has the disease
PersonHasDisease bool
// This describes the number of variants that were tested for this disease
QuantityOfVariantsTested int
// This describes the number of loci that were tested for this disease
// 1 locus can have multiple potential variants
QuantityOfLociTested int
// This describes the number of loci which are phased
// This number will always be <= QuantityOfLociTested
QuantityOfPhasedLoci int
// This describes the probability that the person will pass a disease variant
// It is a value that represents a percentage between 0-100
ProbabilityOfPassingADiseaseVariant int
// This map contains info about all tested monogenic disease variants for the genome
// If the map does not contain an item for a disease variant, then the genome does not contain information for that variant
// Map Structure: Variant Identifier -> PersonGenomeMonogenicDiseaseVariantInfo
VariantsInfoMap map[[3]byte]PersonGenomeMonogenicDiseaseVariantInfo
}
type PersonGenomeMonogenicDiseaseVariantInfo struct{
// These bools describe if each base has the variant at the variant's locus
Base1HasVariant bool
Base2HasVariant bool
// This bool describes if the bases are phased or not
// If they are not phased, then Base1/2 are the same, the number carries no meaning
// If they are phased, then Base1 is inherited from the father? and Base2 was inherited from the mother?
LocusIsPhased bool
}
type PersonPolygenicDiseaseInfo struct{
// If no map entries exist, then no disease info is known
// Map Structure: Genome Identifier -> PersonGenomePolygenicDiseaseInfo
PolygenicDiseaseInfoMap map[[16]byte]PersonGenomePolygenicDiseaseInfo
// This is true if there are multiple genomes and the results from each genome differ
ConflictExists bool
}
type PersonGenomePolygenicDiseaseInfo struct{
// This is total risk score for this disease for the person's genome
// This is a number between 1-10
RiskScore int
// This map stores the confidence ranges for the predicted risk score
// If we want to know how accurate the prediction is with a X% accuracy, how far would we have to expand the
// risk score's range to be accurate, X% of the time?
// For example: 50% accuracy requires a +/-2 point range, 80% accuracy requires a +-5 point range
// Map Structure: Accuracy probability (0-100) -> Amount to add to value in both +/- directions so prediction is that accurate
ConfidenceRangesMap map[int]float64
// This describes the quantity of loci tested for this disease
QuantityOfLociKnown int
QuantityOfPhasedLoci int
}
type PersonDiscreteTraitInfo struct{
// This map contains the person's trait info for each genome
// If no map entries exist, then no trait info is known
// Map Structure: Genome Identifier -> PersonGenomeDiscreteTraitInfo
TraitInfoMap map[[16]byte]PersonGenomeDiscreteTraitInfo
// This is true if there are multiple genomes and the results from each genome differ
ConflictExists bool
}
// For a trait analysis, both analysis methods may exist in the results
// However, the GUI will only display the results from one of the methods.
// The neural network prediction is always prioritized over the rule-based prediction
type PersonGenomeDiscreteTraitInfo struct{
// This is true if it is possible to analyze this trait using a neural network
NeuralNetworkExists bool
// This is true if a neural network analysis was performed for this genome
// This means that at least 1 locus for this trait was contained in the genome
NeuralNetworkAnalysisExists bool
NeuralNetworkAnalysis PersonGenomeDiscreteTraitInfo_NeuralNetwork
// This is true if it is possible to analyze this trait using rules
AnyRulesExist bool
// This is true if a rules-based analysis was performed for this genome
// This means that all of the loci for at least 1 rule for this trait was contained in the genome
RulesAnalysisExists bool
RulesAnalysis PersonGenomeDiscreteTraitInfo_Rules
}
type PersonGenomeDiscreteTraitInfo_NeuralNetwork struct{
// The predicted outcome (Example: "Blue")
PredictedOutcome string
// Probability (0-100) that the outcome from the neural network is true
PredictionConfidence int
QuantityOfLociKnown int
QuantityOfPhasedLoci int
}
type PersonGenomeDiscreteTraitInfo_Rules struct{
// Map Structure: Rule Identifier -> Genome Passes rule (true if the genome passes the rule)
GenomePassesRulesMap map[[3]byte]bool
// This is true if there was not a tie between summed rule outcome scores
// It is possible to have some tested rules without a known outcome
PredictedOutcomeExists bool
// This is the outcome that was predicted
// Example: "Intolerant"
PredictedOutcome string
// This should be len(GenomePassesRulesMap)
QuantityOfRulesTested int
// This only counts the loci which are used for rules
// For example, loci that are only used in neural-network-based prediction are not counted
QuantityOfLociKnown int
}
type PersonNumericTraitInfo struct{
// This map contains the person's trait info for each genome
// If no map entries exist, then no trait info is known
// Map Structure: Genome Identifier -> PersonGenomeNumericTraitInfo
TraitInfoMap map[[16]byte]PersonGenomeNumericTraitInfo
// This is true if there are multiple genomes and the results from each genome differ
ConflictExists bool
}
type PersonGenomeNumericTraitInfo struct{
// The predicted outcome (Example: The predicted height for this person, in centimeters)
PredictedOutcome float64
// This map stores the confidence ranges for the predicted value
// If we want to know how accurate the prediction is with a X% accuracy, how far would we have to expand the
// predicted value's range to be accurate, X% of the time?
// For example: 50% accuracy requires a +/-5 point range, 80% accuracy requires a +-15 point range
// Map Structure: Accuracy probability (0-100) -> Amount to add to value in both +/- directions so prediction is that accurate
ConfidenceRangesMap map[int]float64
QuantityOfLociKnown int
QuantityOfPhasedLoci int
}
type CoupleAnalysis struct{
AnalysisVersion int
Pair1Person1GenomeIdentifier [16]byte
Pair1Person2GenomeIdentifier [16]byte
// This is only true if at least 1 person has more than 1 genome
SecondPairExists bool
// These are empty unless SecondPairExists == true
Pair2Person1GenomeIdentifier [16]byte
Pair2Person2GenomeIdentifier [16]byte
Person1HasMultipleGenomes bool
Person2HasMultipleGenomes bool
// These are empty unless Person1HasMultipleGenomes == true
Person1OnlyExcludeConflictsGenomeIdentifier [16]byte
Person1OnlyIncludeSharedGenomeIdentifier [16]byte
// These are empty unless Person2HasMultipleGenomes == true
Person2OnlyExcludeConflictsGenomeIdentifier [16]byte
Person2OnlyIncludeSharedGenomeIdentifier [16]byte
// Map Structure: Disease Name -> OffspringMonogenicDiseaseInfo
MonogenicDiseasesMap map[string]OffspringMonogenicDiseaseInfo
// Map Structure: Disease Name -> OffspringPolygenicDiseaseInfo
PolygenicDiseasesMap map[string]OffspringPolygenicDiseaseInfo
// Discrete traits are traits with discrete outcomes, such as Eye Color
// Map Structure: Trait Name -> Trait Info Object
DiscreteTraitsMap map[string]OffspringDiscreteTraitInfo
// Numeric traits are traits with numeric outcomes, such as Height
// Map Structure: Trait Name -> Trait Info Object
NumericTraitsMap map[string]OffspringNumericTraitInfo
}
type OffspringMonogenicDiseaseInfo struct{
// This map stores the quantity of variants tested in each person's genome
// Map Structure: Genome Identifier -> Number of variants tested
QuantityOfVariantsTestedMap map[[16]byte]int
// This map stores the offspring disease probabilities for each genome pair.
// A genome pair is a concatenation of two genome identifiers
// If a map entry doesn't exist, the probabilities are unknown for that genome pair
// Map Structure: Genome Pair Identifier -> OffspringGenomePairMonogenicDiseaseInfo
MonogenicDiseaseInfoMap map[[32]byte]OffspringGenomePairMonogenicDiseaseInfo
// This is true if there is more than 1 genome pair and the results from each genome pair differ
ConflictExists bool
}
type OffspringGenomePairMonogenicDiseaseInfo struct{
// At least 1 variant's information is needed from either person to include the diseaseInfo object in the MonogenicDiseaseInfoMap
ProbabilityOffspringHasDiseaseIsKnown bool
// This is the probability that the offspring will have the disease
// Is a number between 0-100%
ProbabilityOffspringHasDisease int
ProbabilityOffspringHasVariantIsKnown bool
// This is the probability that the offspring will have a variant
// Is a number between 0-100%
ProbabilityOffspringHasVariant int
// Map Structure: Variant Identifier -> OffspringMonogenicDiseaseVariantInfo
VariantsInfoMap map[[3]byte]OffspringMonogenicDiseaseVariantInfo
}
type OffspringMonogenicDiseaseVariantInfo struct{
// These are all numbers between 0-100%
ProbabilityOf0MutationsLowerBound int
ProbabilityOf0MutationsUpperBound int
ProbabilityOf1MutationLowerBound int
ProbabilityOf1MutationUpperBound int
ProbabilityOf2MutationsLowerBound int
ProbabilityOf2MutationsUpperBound int
}
type OffspringPolygenicDiseaseInfo struct{
// This map stores the polygenic disease info for each genome pair
// Map Structure: Genome Pair Identifier -> OffspringGenomePairPolygenicDiseaseInfo
PolygenicDiseaseInfoMap map[[32]byte]OffspringGenomePairPolygenicDiseaseInfo
// This is true if there is more than 1 genome pair and the results from each genome pair differ
ConflictExists bool
}
type OffspringGenomePairPolygenicDiseaseInfo struct{
// A number between 1-10 representing the offspring's average risk score
// 1 == lowest risk, 10 == highest risk
OffspringAverageRiskScore int
// This map stores the confidence ranges for the predicted risk score
// If we want to know how accurate the prediction is with a X% accuracy, how far would we have to expand the
// risk score's range to be accurate, X% of the time?
// For example: 50% accuracy requires a +/-2 point range, 80% accuracy requires a +-3 point range
// Map Structure: Accuracy probability (0-100) -> Amount to add to value in both +/- directions so prediction is that accurate
PredictionConfidenceRangesMap map[int]float64
QuantityOfLociKnown int
// This describes the quantity of loci from both parents that are phased
// For example, if there are 10 loci for this trait, and one parent has 10 phased loci and the other has 5,
// this variable will have a value of 15
QuantityOfParentalPhasedLoci int
// This is a list of prospective offspring risk scores
// This is useful for plotting on a graph to understand the standard deviation of risk
SampleOffspringRiskScoresList []int
}
type OffspringDiscreteTraitInfo struct{
// This map stores the trait info for each genome pair
// Map Structure: Genome Pair Identifier -> OffspringGenomePairTraitInfo
TraitInfoMap map[[32]byte]OffspringGenomePairDiscreteTraitInfo
ConflictExists bool
}
// For a trait analysis, both analysis methods may exist in the results
// However, the GUI will only display the results from one of the methods.
// The neural network prediction is always prioritized over the rule-based prediction
type OffspringGenomePairDiscreteTraitInfo struct{
// This is true if it is possible to analyze this trait using a neural network
NeuralNetworkExists bool
// This is true if a neural network analysis was performed for this genome
// This means that at least 1 locus for this trait was contained in both of the genomes in the pair
NeuralNetworkAnalysisExists bool
NeuralNetworkAnalysis OffspringGenomePairDiscreteTraitInfo_NeuralNetwork
// This is true if it is possible to analyze this trait using rules
RulesExist bool
// This is true if a rules-based analysis was performed for this genome
// This means that all of the loci for at least 1 rule for this trait was contained in both of the genomes in the pair
// Also, none of the offspring have an unknown outcome caused by an outcome score tie
RulesAnalysisExists bool
RulesAnalysis OffspringGenomePairDiscreteTraitInfo_Rules
}
type OffspringGenomePairDiscreteTraitInfo_NeuralNetwork struct{
// Map Structure: Outcome Name -> Outcome Probability (0-100)
// Example: "Intolerant" -> 5
OffspringOutcomeProbabilitiesMap map[string]int
// Probability (0-100) that each outcome from the neural network is true
// This is an average of the confidence for each of the calculated 100 outcome probabilities
AverageConfidence int
QuantityOfLociKnown int
// This describes the quantity of loci from both parents that are phased
// For example, if there are 10 loci for this trait, and one parent has 10 phased loci and the other has 5,
// this variable will have a value of 15
QuantityOfParentalPhasedLoci int
}
type OffspringGenomePairDiscreteTraitInfo_Rules struct{
// Map Structure: Outcome Name -> Outcome Probability (0-100)
// Example: "Intolerant" -> 5
OffspringOutcomeProbabilitiesMap map[string]int
// Map Structure: Rule Identifier -> Offspring Probability Of Passing Rule
// The value stores the probability that the offspring will pass the rule
// This is a number between 0-100%
ProbabilityOfPassingRulesMap map[[3]byte]int
// This should be len(ProbabilityOfPassingRulesMap)
QuantityOfRulesTested int
// This only counts the loci which are used for rules
// For example, loci that are only used in neural-network-based prediction are not counted
QuantityOfLociKnown int
}
type OffspringNumericTraitInfo struct{
// This map stores the trait info for each genome pair
// Map Structure: Genome Pair Identifier -> OffspringGenomePairNumericTraitInfo
TraitInfoMap map[[32]byte]OffspringGenomePairNumericTraitInfo
ConflictExists bool
}
type OffspringGenomePairNumericTraitInfo struct{
// The average outcome for the offspring
// For example, the average height for an offspring between these 2 people
OffspringAverageOutcome float64
// This map stores the confidence ranges for the predicted value
// If we want to know how accurate the prediction is with a X% accuracy, how far would we have to expand the
// predicted value's range to be accurate, X% of the time?
// For example: 50% accuracy requires a +/-5 point range, 80% accuracy requires a +-15 point range
// Map Structure: Accuracy probability (0-100) -> Amount to add to value in both +/- directions so prediction is that accurate
PredictionConfidenceRangesMap map[int]float64
QuantityOfLociKnown int
// This describes the quantity of loci from both parents that are phased
// For example, if there are 10 loci for this trait, and one parent has 10 phased loci and the other has 5,
// this variable will have a value of 15
QuantityOfParentalPhasedLoci int
// A list of 100 offspring outcomes for 100 prospective offspring from the genome pair
// Example: A list of heights for 100 prospective offspring
SampleOffspringOutcomesList []float64
}