From ea82419b3886e29d998d0f2983860efcc5ef47e6 Mon Sep 17 00:00:00 2001 From: Simon Sarasova Date: Thu, 15 Aug 2024 12:14:23 +0000 Subject: [PATCH] Implemented in-memory storage of trained neural network objects. Trained neural network objects now only have to be created once, so predictions are faster. --- Changelog.md | 1 + Contributors.md | 2 +- gui/viewAnalysisGui_Couple.go | 14 +- gui/viewAnalysisGui_Person.go | 8 +- gui/viewProfileGui.go | 10 +- internal/appUsers/appUsers.go | 4 + .../createCoupleGeneticAnalysis.go | 8 +- .../createCoupleGeneticAnalysis_test.go | 17 +- .../createPersonGeneticAnalysis.go | 10 +- .../createPersonGeneticAnalysis_test.go | 11 + .../geneticPrediction/geneticPrediction.go | 613 +------------ .../geneticPrediction_test.go | 5 +- .../geneticPredictionModels.go | 230 +++++ .../geneticPredictionModels.go | 121 --- .../geneticPredictionModels_test.go | 63 -- .../AutismModelAccuracy.gob | Bin .../EyeColorModelAccuracy.gob | Bin .../HeightModelAccuracy.gob | Bin .../HomosexualnessModelAccuracy.gob | Bin .../LactoseToleranceModelAccuracy.gob | Bin .../ObesityModelAccuracy.gob | Bin .../predictionModels/AutismModel.gob | Bin .../predictionModels/EyeColorModel.gob | Bin .../predictionModels/HeightModel.gob | Bin .../predictionModels/HomosexualnessModel.gob | Bin .../LactoseToleranceModel.gob | Bin .../predictionModels/ObesityModel.gob | Bin .../trainedPredictionModels.go | 805 ++++++++++++++++++ .../trainedPredictionModels_test.go | 172 ++++ .../createGeneticModels.go | 38 +- 30 files changed, 1307 insertions(+), 825 deletions(-) create mode 100644 internal/genetics/geneticPredictionModels/geneticPredictionModels.go delete mode 100644 resources/geneticPredictionModels/geneticPredictionModels.go delete mode 100644 resources/geneticPredictionModels/geneticPredictionModels_test.go rename resources/{geneticPredictionModels => trainedPredictionModels}/predictionModelAccuracies/AutismModelAccuracy.gob (100%) rename resources/{geneticPredictionModels => trainedPredictionModels}/predictionModelAccuracies/EyeColorModelAccuracy.gob (100%) rename resources/{geneticPredictionModels => trainedPredictionModels}/predictionModelAccuracies/HeightModelAccuracy.gob (100%) rename resources/{geneticPredictionModels => trainedPredictionModels}/predictionModelAccuracies/HomosexualnessModelAccuracy.gob (100%) rename resources/{geneticPredictionModels => trainedPredictionModels}/predictionModelAccuracies/LactoseToleranceModelAccuracy.gob (100%) rename resources/{geneticPredictionModels => trainedPredictionModels}/predictionModelAccuracies/ObesityModelAccuracy.gob (100%) rename resources/{geneticPredictionModels => trainedPredictionModels}/predictionModels/AutismModel.gob (100%) rename resources/{geneticPredictionModels => trainedPredictionModels}/predictionModels/EyeColorModel.gob (100%) rename resources/{geneticPredictionModels => trainedPredictionModels}/predictionModels/HeightModel.gob (100%) rename resources/{geneticPredictionModels => trainedPredictionModels}/predictionModels/HomosexualnessModel.gob (100%) rename resources/{geneticPredictionModels => trainedPredictionModels}/predictionModels/LactoseToleranceModel.gob (100%) rename resources/{geneticPredictionModels => trainedPredictionModels}/predictionModels/ObesityModel.gob (100%) create mode 100644 resources/trainedPredictionModels/trainedPredictionModels.go create mode 100644 resources/trainedPredictionModels/trainedPredictionModels_test.go diff --git a/Changelog.md b/Changelog.md index 486f8df..72ec9c2 100644 --- a/Changelog.md +++ b/Changelog.md @@ -6,6 +6,7 @@ Small and insignificant changes may not be included in this log. ## Unversioned Changes +* Implemented in-memory storage of trained neural network objects. Trained neural network objects now only have to be created once, so predictions are faster. - *Simon Sarasova* * Removed link to Seekia's defunct Tor onionsite. - *Simon Sarasova* * Improved Whitepaper.md and Future-Plans.md. - *Simon Sarasova* * Created the GetUserGenomeLocusValuesMapFromProfile function and used it to remove some duplicated code. - *Simon Sarasova* diff --git a/Contributors.md b/Contributors.md index 0f91e26..2fcac58 100644 --- a/Contributors.md +++ b/Contributors.md @@ -9,4 +9,4 @@ Many other people have written code for modules which are imported by Seekia. Th Name | Date Of First Commit | Number Of Commits --- | --- | --- -Simon Sarasova | June 13, 2023 | 284 \ No newline at end of file +Simon Sarasova | June 13, 2023 | 285 \ No newline at end of file diff --git a/gui/viewAnalysisGui_Couple.go b/gui/viewAnalysisGui_Couple.go index 2763983..6908188 100644 --- a/gui/viewAnalysisGui_Couple.go +++ b/gui/viewAnalysisGui_Couple.go @@ -10,7 +10,7 @@ import "fyne.io/fyne/v2/theme" import "fyne.io/fyne/v2/widget" import "fyne.io/fyne/v2/canvas" -import "seekia/resources/geneticPredictionModels" +import "seekia/resources/trainedPredictionModels" import "seekia/resources/geneticReferences/monogenicDiseases" import "seekia/resources/geneticReferences/polygenicDiseases" import "seekia/resources/geneticReferences/traits" @@ -1018,7 +1018,7 @@ func setViewCoupleGeneticAnalysisPolygenicDiseasesPage(window fyne.Window, perso diseaseName := diseaseObject.DiseaseName - neuralNetworkExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(diseaseName) + neuralNetworkExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(diseaseName) if (neuralNetworkExists == false){ // We cannot analyze this disease continue @@ -1150,7 +1150,7 @@ func setViewCoupleGeneticAnalysisPolygenicDiseaseDetailsPage(window fyne.Window, currentPage := func(){setViewCoupleGeneticAnalysisPolygenicDiseaseDetailsPage(window, person1Name, person2Name, person1AnalysisObject, person2AnalysisObject, coupleAnalysisObject, diseaseName, previousPage)} - neuralNetworkExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(diseaseName) + neuralNetworkExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(diseaseName) if (neuralNetworkExists == false){ // We cannot analyze this disease setErrorEncounteredPage(window, errors.New("setViewCoupleGeneticAnalysisPolygenicDiseaseDetailsPage called non-analyzable trait: " + diseaseName), previousPage) @@ -1966,7 +1966,7 @@ func setViewCoupleGeneticAnalysisDiscreteTraitDetailsPage(window fyne.Window, pe return } - neuralNetworkExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(traitName) + neuralNetworkExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(traitName) emptyLabel1 := widget.NewLabel("") emptyLabel2 := widget.NewLabel("") @@ -2418,7 +2418,7 @@ func setViewCoupleGeneticAnalysisDiscreteTraitGenomePairDetailsPage(window fyne. return } - neuralNetworkExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(traitName) + neuralNetworkExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(traitName) predictedOutcomeHelpButton := widget.NewButtonWithIcon("", theme.QuestionIcon(), func(){ @@ -2855,7 +2855,7 @@ func setViewCoupleGeneticAnalysisNumericTraitsPage(window fyne.Window, person1Na traitName := traitObject.TraitName - neuralNetworkExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(traitName) + neuralNetworkExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(traitName) if (neuralNetworkExists == false){ // We cannot analyze this trait continue @@ -3030,7 +3030,7 @@ func setViewCoupleGeneticAnalysisNumericTraitDetailsPage(window fyne.Window, per }) traitNameRow := container.NewHBox(layout.NewSpacer(), traitNameLabel, traitNameText, traitNameInfoButton, layout.NewSpacer()) - neuralNetworkExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(traitName) + neuralNetworkExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(traitName) if (neuralNetworkExists == false){ // We cannot analyze this trait setErrorEncounteredPage(window, errors.New("setViewCoupleGeneticAnalysisNumericTraitDetailsPage called non-analyzable trait: " + traitName), previousPage) diff --git a/gui/viewAnalysisGui_Person.go b/gui/viewAnalysisGui_Person.go index 3426b48..da7bea5 100644 --- a/gui/viewAnalysisGui_Person.go +++ b/gui/viewAnalysisGui_Person.go @@ -13,7 +13,7 @@ import "fyne.io/fyne/v2/layout" import "fyne.io/fyne/v2/theme" import "fyne.io/fyne/v2/widget" -import "seekia/resources/geneticPredictionModels" +import "seekia/resources/trainedPredictionModels" import "seekia/resources/geneticReferences/monogenicDiseases" import "seekia/resources/geneticReferences/polygenicDiseases" import "seekia/resources/geneticReferences/traits" @@ -942,7 +942,7 @@ func setViewPersonGeneticAnalysisPolygenicDiseasesPage(window fyne.Window, perso diseaseName := diseaseObject.DiseaseName - neuralNetworkExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(diseaseName) + neuralNetworkExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(diseaseName) if (neuralNetworkExists == false){ // We can't analyze this trait continue @@ -1928,7 +1928,7 @@ func setViewPersonGeneticAnalysisDiscreteTraitDetailsPage(window fyne.Window, pe if (err != nil){ return nil, err } } - neuralNetworkExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(traitName) + neuralNetworkExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(traitName) if (neuralNetworkExists == true){ @@ -2440,7 +2440,7 @@ func setViewPersonGeneticAnalysisNumericTraitsPage(window fyne.Window, personIde traitNameText := getBoldLabelCentered(traitName) - neuralNetworkExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(traitName) + neuralNetworkExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(traitName) if (neuralNetworkExists == false){ // This trait has no neural network // We cannot analyze it diff --git a/gui/viewProfileGui.go b/gui/viewProfileGui.go index 8bedba3..874115e 100644 --- a/gui/viewProfileGui.go +++ b/gui/viewProfileGui.go @@ -12,7 +12,7 @@ import "fyne.io/fyne/v2/widget" import "seekia/resources/worldLanguages" import "seekia/resources/worldLocations" -import "seekia/resources/geneticPredictionModels" +import "seekia/resources/trainedPredictionModels" import "seekia/resources/geneticReferences/monogenicDiseases" import "seekia/resources/geneticReferences/polygenicDiseases" import "seekia/resources/geneticReferences/traits" @@ -2505,7 +2505,7 @@ func setViewMateProfilePage_TotalDiseaseRisk(window fyne.Window, getAnyUserProfi totalNumberOfPolygenicDiseases := 0 for _, diseaseName := range allPolygenicDiseaseNamesList{ - predictionModelExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(diseaseName) + predictionModelExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(diseaseName) if (predictionModelExists == true){ totalNumberOfPolygenicDiseases += 1 } @@ -3025,7 +3025,7 @@ func setViewMateProfilePage_PolygenicDiseases(window fyne.Window, userOrOffsprin diseaseName := diseaseObject.DiseaseName diseaseLociList := diseaseObject.LociList - predictionModelExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(diseaseName) + predictionModelExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(diseaseName) if (predictionModelExists == false){ // Prediction is not possible for this disease continue @@ -3314,7 +3314,7 @@ func setViewMateProfilePage_DiscreteGeneticTraits(window fyne.Window, userOrOffs continue } - traitNeuralNetworkExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(traitName) + traitNeuralNetworkExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(traitName) if (traitNeuralNetworkExists == false && totalNumberOfTraitRules == 0){ // We are not able to analyze these traits yet continue @@ -4118,7 +4118,7 @@ func setViewMateProfilePage_NumericGeneticTraits(window fyne.Window, userOrOffsp continue } - traitNeuralNetworkExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(traitName) + traitNeuralNetworkExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(traitName) if (traitNeuralNetworkExists == false){ // We are not able to analyze these traits yet continue diff --git a/internal/appUsers/appUsers.go b/internal/appUsers/appUsers.go index 8e8c38c..8402939 100644 --- a/internal/appUsers/appUsers.go +++ b/internal/appUsers/appUsers.go @@ -12,6 +12,7 @@ import "seekia/resources/geneticReferences/locusMetadata" import "seekia/resources/geneticReferences/monogenicDiseases" import "seekia/resources/geneticReferences/polygenicDiseases" import "seekia/resources/geneticReferences/traits" +import "seekia/resources/trainedPredictionModels" import "seekia/resources/worldLanguages" import "seekia/resources/worldLocations" @@ -401,6 +402,9 @@ func initializeApplicationVariables()error{ err = traits.InitializeTraitVariables() if (err != nil) { return err } + err = trainedPredictionModels.InitializeTrainedPredictionModels() + if (err != nil) { return err } + err = profileFormat.InitializeProfileFormatVariables() if (err != nil) { return err } diff --git a/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis.go b/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis.go index da8cfb0..7f1863e 100644 --- a/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis.go +++ b/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis.go @@ -12,7 +12,7 @@ package createCoupleGeneticAnalysis // TODO: We want to eventually use neural nets for polygenic disease analysis (see geneticPrediction.go) // This is only possible once we get access to the necessary training data -import "seekia/resources/geneticPredictionModels" +import "seekia/resources/trainedPredictionModels" import "seekia/resources/geneticReferences/locusMetadata" import "seekia/resources/geneticReferences/monogenicDiseases" import "seekia/resources/geneticReferences/polygenicDiseases" @@ -974,7 +974,7 @@ func GetOffspringPolygenicDiseaseAnalysis(diseaseObject polygenicDiseases.Polyge diseaseName := diseaseObject.DiseaseName - modelExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(diseaseName) + modelExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(diseaseName) if (modelExists == false){ // Prediction is not possible for this trait return false, false, 0, nil, nil, 0, 0, nil @@ -1083,7 +1083,7 @@ func GetOffspringDiscreteTraitAnalysis_NeuralNetwork(traitObject traits.Trait, p return false, false, nil, 0, 0, 0, errors.New("GetOffspringDiscreteTraitAnalysis_NeuralNetwork called with non-discrete trait.") } - modelExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(traitName) + modelExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(traitName) if (modelExists == false){ // Neural network prediction is not possible for this trait return false, false, nil, 0, 0, 0, nil @@ -1270,7 +1270,7 @@ func GetOffspringNumericTraitAnalysis(traitObject traits.Trait, person1LocusValu return false, false, 0, nil, nil, 0, 0, errors.New("GetOffspringNumericTraitAnalysis called with non-numeric trait.") } - modelExists, _ := geneticPredictionModels.GetGeneticPredictionModelBytes(traitName) + modelExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(traitName) if (modelExists == false){ // Prediction is not possible for this trait return false, false, 0, nil, nil, 0, 0, nil diff --git a/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis_test.go b/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis_test.go index abd3c84..68d8607 100644 --- a/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis_test.go +++ b/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis_test.go @@ -8,6 +8,7 @@ import "seekia/resources/geneticReferences/locusMetadata" import "seekia/resources/geneticReferences/monogenicDiseases" import "seekia/resources/geneticReferences/polygenicDiseases" import "seekia/resources/geneticReferences/traits" +import "seekia/resources/trainedPredictionModels" import "seekia/internal/genetics/createRawGenomes" import "seekia/internal/genetics/prepareRawGenomes" @@ -36,6 +37,11 @@ func TestCreateCoupleGeneticAnalysis_SingleGenomes(t *testing.T){ t.Fatalf("InitializeTraitVariables failed: " + err.Error()) } + err = trainedPredictionModels.InitializeTrainedPredictionModels() + if (err != nil) { + t.Fatalf("InitializeTrainedPredictionModels failed: " + err.Error()) + } + getPersonGenomesList := func()([]prepareRawGenomes.RawGenomeWithMetadata, error){ genomeIdentifier, err := helpers.GetNewRandom16ByteArray() @@ -119,6 +125,11 @@ func TestCreateCoupleGeneticAnalysis_SingleAndMultipleGenomes(t *testing.T){ t.Fatalf("InitializeTraitVariables failed: " + err.Error()) } + err = trainedPredictionModels.InitializeTrainedPredictionModels() + if (err != nil) { + t.Fatalf("InitializeTrainedPredictionModels failed: " + err.Error()) + } + getPersonGenomesList := func(addSecondGenome bool)([]prepareRawGenomes.RawGenomeWithMetadata, error){ genomeIdentifier1, err := helpers.GetNewRandom16ByteArray() @@ -205,7 +216,6 @@ func TestCreateCoupleGeneticAnalysis_SingleAndMultipleGenomes(t *testing.T){ } - func TestCreateCoupleGeneticAnalysis_MultipleGenomes(t *testing.T){ err := locusMetadata.InitializeLocusMetadataVariables() @@ -225,6 +235,11 @@ func TestCreateCoupleGeneticAnalysis_MultipleGenomes(t *testing.T){ t.Fatalf("InitializeTraitVariables failed: " + err.Error()) } + err = trainedPredictionModels.InitializeTrainedPredictionModels() + if (err != nil) { + t.Fatalf("InitializeTrainedPredictionModels failed: " + err.Error()) + } + getPersonGenomesList := func()([]prepareRawGenomes.RawGenomeWithMetadata, error){ genomeIdentifier1, err := helpers.GetNewRandom16ByteArray() diff --git a/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis.go b/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis.go index 7e4a318..3c55666 100644 --- a/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis.go +++ b/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis.go @@ -15,10 +15,10 @@ import "seekia/resources/geneticReferences/locusMetadata" import "seekia/resources/geneticReferences/monogenicDiseases" import "seekia/resources/geneticReferences/polygenicDiseases" import "seekia/resources/geneticReferences/traits" +import "seekia/resources/trainedPredictionModels" import "seekia/internal/encoding" import "seekia/internal/genetics/geneticAnalysis" -import "seekia/internal/genetics/geneticPrediction" import "seekia/internal/genetics/locusValue" import "seekia/internal/genetics/prepareRawGenomes" import "seekia/internal/helpers" @@ -790,7 +790,7 @@ func GetPersonGenomePolygenicDiseaseAnalysis(diseaseObject polygenicDiseases.Pol diseaseName := diseaseObject.DiseaseName - neuralNetworkModelExists, riskScorePredictionIsPossible, predictedRiskScore, predictionAccuracyRangesMap, quantityOfLociKnown, quantityOfPhasedLoci, err := geneticPrediction.GetNeuralNetworkNumericAttributePredictionFromGenomeMap(diseaseName, diseaseLociList, genomeLocusValuesMap) + neuralNetworkModelExists, riskScorePredictionIsPossible, predictedRiskScore, predictionAccuracyRangesMap, quantityOfLociKnown, quantityOfPhasedLoci, err := trainedPredictionModels.GetNeuralNetworkNumericAttributePredictionFromGenomeMap(diseaseName, diseaseLociList, genomeLocusValuesMap) if (err != nil) { return false, false, 0, nil, 0, 0, err } if (neuralNetworkModelExists == false){ return false, false, 0, nil, 0, 0, nil @@ -1028,7 +1028,9 @@ func GetGenomeDiscreteTraitAnalysis_NeuralNetwork(traitObject traits.Trait, geno traitName := traitObject.TraitName - neuralNetworkModelExists, traitPredictionIsPossible, predictedOutcome, predictionConfidence, quantityOfLociKnown, quantityOfPhasedLoci, err := geneticPrediction.GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap(traitName, genomeLocusValuesMap) + traitLociList := traitObject.LociList + + neuralNetworkModelExists, traitPredictionIsPossible, predictedOutcome, predictionConfidence, quantityOfLociKnown, quantityOfPhasedLoci, err := trainedPredictionModels.GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap(traitName, traitLociList, genomeLocusValuesMap) if (err != nil) { return false, false, "", 0, 0, 0, err } if (neuralNetworkModelExists == false){ return false, false, "", 0, 0, 0, nil @@ -1271,7 +1273,7 @@ func GetGenomeNumericTraitAnalysis(traitObject traits.Trait, genomeMap map[int64 traitName := traitObject.TraitName - neuralNetworkModelExists, traitPredictionIsPossible, predictedOutcome, predictionAccuracyRangesMap, quantityOfLociKnown, quantityOfPhasedLoci, err := geneticPrediction.GetNeuralNetworkNumericAttributePredictionFromGenomeMap(traitName, traitLociList, genomeLocusValuesMap) + neuralNetworkModelExists, traitPredictionIsPossible, predictedOutcome, predictionAccuracyRangesMap, quantityOfLociKnown, quantityOfPhasedLoci, err := trainedPredictionModels.GetNeuralNetworkNumericAttributePredictionFromGenomeMap(traitName, traitLociList, genomeLocusValuesMap) if (err != nil) { return false, false, 0, nil, 0, 0, err } if (neuralNetworkModelExists == false){ return false, false, 0, nil, 0, 0, nil diff --git a/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis_test.go b/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis_test.go index 90d699a..b112633 100644 --- a/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis_test.go +++ b/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis_test.go @@ -8,6 +8,7 @@ import "seekia/resources/geneticReferences/locusMetadata" import "seekia/resources/geneticReferences/monogenicDiseases" import "seekia/resources/geneticReferences/polygenicDiseases" import "seekia/resources/geneticReferences/traits" +import "seekia/resources/trainedPredictionModels" import "seekia/internal/genetics/createRawGenomes" import "seekia/internal/genetics/prepareRawGenomes" @@ -36,6 +37,11 @@ func TestCreatePersonGeneticAnalysis_SingleGenome(t *testing.T){ t.Fatalf("InitializeTraitVariables failed: " + err.Error()) } + err = trainedPredictionModels.InitializeTrainedPredictionModels() + if (err != nil) { + t.Fatalf("InitializeTrainedPredictionModels failed: " + err.Error()) + } + genomeIdentifier, err := helpers.GetNewRandom16ByteArray() if (err != nil) { t.Fatalf("Failed to get random 16 byte array: " + err.Error()) @@ -103,6 +109,11 @@ func TestCreatePersonGeneticAnalysis_MultipleGenomes(t *testing.T){ t.Fatalf("InitializeTraitVariables failed: " + err.Error()) } + err = trainedPredictionModels.InitializeTrainedPredictionModels() + if (err != nil) { + t.Fatalf("InitializeTrainedPredictionModels failed: " + err.Error()) + } + numberOfGenomesToAdd := helpers.GetRandomIntWithinRange(2, 5) genomesList := make([]prepareRawGenomes.RawGenomeWithMetadata, 0, numberOfGenomesToAdd) diff --git a/internal/genetics/geneticPrediction/geneticPrediction.go b/internal/genetics/geneticPrediction/geneticPrediction.go index a75e4a5..968655c 100644 --- a/internal/genetics/geneticPrediction/geneticPrediction.go +++ b/internal/genetics/geneticPrediction/geneticPrediction.go @@ -12,8 +12,8 @@ package geneticPrediction import "seekia/resources/geneticReferences/polygenicDiseases" import "seekia/resources/geneticReferences/traits" -import "seekia/resources/geneticPredictionModels" +import "seekia/internal/genetics/geneticPredictionModels" import "seekia/internal/genetics/locusValue" import "seekia/internal/genetics/readBiobankData" import "seekia/internal/helpers" @@ -29,20 +29,6 @@ import "slices" import "errors" -type NeuralNetwork struct{ - - // ExprGraph is a data structure for a directed acyclic graph (of expressions). - graph *gorgonia.ExprGraph - - // These are the weights for each layer of neurons - weights1 *gorgonia.Node - weights2 *gorgonia.Node - weights3 *gorgonia.Node - - // This is the computed prediction - prediction *gorgonia.Node -} - // This struct stores a user's training data // Each TrainingData represents a single data example // For example, the InputLayer is a column of neurons representing a user's genetics, @@ -92,495 +78,12 @@ func DecodeBytesToTrainingDataObject(inputTrainingData []byte)(TrainingData, err return newTrainingData, nil } -// We use this to store a neural network's weights as a .gob file -type neuralNetworkForEncoding struct{ - - // These are the weights for each layer of neurons - Weights1 []float32 - Weights2 []float32 - Weights3 []float32 - - Weights1Rows int - Weights1Columns int - Weights2Rows int - Weights2Columns int - Weights3Rows int - Weights3Columns int -} - -func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte, error){ - - weights1 := inputNeuralNetwork.weights1 - weights2 := inputNeuralNetwork.weights2 - weights3 := inputNeuralNetwork.weights3 - - weights1Slice := weights1.Value().Data().([]float32) - weights2Slice := weights2.Value().Data().([]float32) - weights3Slice := weights3.Value().Data().([]float32) - - weights1Rows := weights1.Shape()[0] - weights1Columns := weights1.Shape()[1] - weights2Rows := weights2.Shape()[0] - weights2Columns := weights2.Shape()[1] - weights3Rows := weights3.Shape()[0] - weights3Columns := weights3.Shape()[1] - - newNeuralNetworkForEncoding := neuralNetworkForEncoding{ - Weights1: weights1Slice, - Weights2: weights2Slice, - Weights3: weights3Slice, - - Weights1Rows: weights1Rows, - Weights1Columns: weights1Columns, - Weights2Rows: weights2Rows, - Weights2Columns: weights2Columns, - Weights3Rows: weights3Rows, - Weights3Columns: weights3Columns, - } - - buffer := new(bytes.Buffer) - - encoder := gob.NewEncoder(buffer) - - err := encoder.Encode(newNeuralNetworkForEncoding) - if (err != nil) { return nil, err } - - neuralNetworkBytes := buffer.Bytes() - - return neuralNetworkBytes, nil -} - -func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork, error){ - - if (inputNeuralNetwork == nil){ - return NeuralNetwork{}, errors.New("DecodeBytesToNeuralNetworkObject called with nil inputNeuralNetwork.") - } - - buffer := bytes.NewBuffer(inputNeuralNetwork) - - decoder := gob.NewDecoder(buffer) - - var newNeuralNetworkForEncoding neuralNetworkForEncoding - - err := decoder.Decode(&newNeuralNetworkForEncoding) - if (err != nil){ return NeuralNetwork{}, err } - - weights1 := newNeuralNetworkForEncoding.Weights1 - weights2 := newNeuralNetworkForEncoding.Weights2 - weights3 := newNeuralNetworkForEncoding.Weights3 - - weights1Rows := newNeuralNetworkForEncoding.Weights1Rows - weights1Columns := newNeuralNetworkForEncoding.Weights1Columns - weights2Rows := newNeuralNetworkForEncoding.Weights2Rows - weights2Columns := newNeuralNetworkForEncoding.Weights2Columns - weights3Rows := newNeuralNetworkForEncoding.Weights3Rows - weights3Columns := newNeuralNetworkForEncoding.Weights3Columns - - // This is the graph object we add each layer to - newGraph := gorgonia.NewGraph() - - // A layer is a column of neurons - // Each neuron has an initial value between 0 and 1 - getNewNeuralNetworkLayerWeights := func(layerName string, layerNeuronRows int, layerNeuronColumns int, layerWeightsList []float32)*gorgonia.Node{ - - layerNameObject := gorgonia.WithName(layerName) - - layerBacking := tensor.WithBacking(layerWeightsList) - layerShape := tensor.WithShape(layerNeuronRows, layerNeuronColumns) - layerTensor := tensor.New(layerBacking, layerShape) - - layerValueObject := gorgonia.WithValue(layerTensor) - - layerObject := gorgonia.NewMatrix(newGraph, tensor.Float32, layerNameObject, layerValueObject) - - return layerObject - } - - layer1 := getNewNeuralNetworkLayerWeights("Weights1", weights1Rows, weights1Columns, weights1) - layer2 := getNewNeuralNetworkLayerWeights("Weights2", weights2Rows, weights2Columns, weights2) - layer3 := getNewNeuralNetworkLayerWeights("Weights3", weights3Rows, weights3Columns, weights3) - - newNeuralNetworkObject := NeuralNetwork{ - - graph: newGraph, - - weights1: layer1, - weights2: layer2, - weights3: layer3, - } - - return newNeuralNetworkObject, nil -} - -// This map is used to store information about how accurate genetic prediction models are for discrete traits -// Map Structure: Discrete Trait Outcome Info -> Discrete Trait Prediction Accuracy Info -type DiscreteTraitPredictionAccuracyInfoMap map[DiscreteTraitOutcomeInfo]DiscreteTraitPredictionAccuracyInfo - -type DiscreteTraitOutcomeInfo struct{ - - // This is the outcome which was predicted - // Example: "Blue" - OutcomeName string - - // This is a value between 0-100 which describes the percentage of the loci which were tested for the input for the prediction - PercentageOfLociTested int - - // This is a value between 0-100 which describes the percentage of the tested loci which were phased for the input for the prediction - PercentageOfPhasedLoci int -} - -type DiscreteTraitPredictionAccuracyInfo struct{ - - // This contains the quantity of examples for the outcome with the specified percentageOfLociTested and percentageOfPhasedLoci - QuantityOfExamples int - - // This contains the quantity of predictions for the outcome with the specified percentageOfLociTested and percentageOfPhasedLoci - // Prediction = our model predicted this outcome - QuantityOfPredictions int - - // This stores the probability (0-100) that our model will accurately predict this outcome for a genome which has - // the specified percentageOfLociTested and percentageOfPhasedLoci - // In other words: What is the probability that if you give Seekia a blue-eyed genome, it will give you a correct Blue prediction? - // This value is only accurate is QuantityOfExamples > 0 - ProbabilityOfCorrectGenomePrediction int - - // This stores the probability (0-100) that our model is correct if our model predicts that a genome - // with the specified percentageOfLociTested and percentageOfPhasedLoci has this outcome - // In other words: What is the probability that if Seekia says a genome will have blue eyes, it is correct? - // This value is only accurate is QuantityOfPredictions > 0 - ProbabilityOfCorrectOutcomePrediction int -} - -func EncodeDiscreteTraitPredictionAccuracyInfoMapToBytes(inputMap DiscreteTraitPredictionAccuracyInfoMap)([]byte, error){ - - buffer := new(bytes.Buffer) - - encoder := gob.NewEncoder(buffer) - - err := encoder.Encode(inputMap) - if (err != nil) { return nil, err } - - inputMapBytes := buffer.Bytes() - - return inputMapBytes, nil -} - -func DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap(inputBytes []byte)(DiscreteTraitPredictionAccuracyInfoMap, error){ - - if (inputBytes == nil){ - return nil, errors.New("DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap called with nil inputBytes.") - } - - buffer := bytes.NewBuffer(inputBytes) - - decoder := gob.NewDecoder(buffer) - - var newDiscreteTraitPredictionAccuracyInfoMap DiscreteTraitPredictionAccuracyInfoMap - - err := decoder.Decode(&newDiscreteTraitPredictionAccuracyInfoMap) - if (err != nil){ return nil, err } - - return newDiscreteTraitPredictionAccuracyInfoMap, nil -} - -type NumericAttributePredictionAccuracyInfoMap map[NumericAttributePredictionInfo]NumericAttributePredictionAccuracyRangesMap - -type NumericAttributePredictionInfo struct{ - - // This is a value between 0-100 which describes the percentage of the loci which were tested for the input for the prediction - PercentageOfLociTested int - - // This is a value between 0-100 which describes the percentage of the tested loci which were phased for the input for the prediction - PercentageOfPhasedLoci int -} - -// Map Structure: Accuracy Percentage (AP) -> Amount needed to deviate from prediction for the value to be accurate (AP)% of the time -// For example, if the model predicted that someone was 150 centimeters tall, how many centimeters would we have to deviate in both directions -// in order for the true outcome to fall into the range 10% of the time, 20% of the time, 30% of the time, etc... -// Example: -// -90%+: 50 centimeters -// If you travel 50 centimeters in both directions from the prediction, -// the true height value will fall into this range 90% of the time. -// -50%+: 20 centimeters -// -10%+: 10 centimeters -type NumericAttributePredictionAccuracyRangesMap map[int]float64 - - -func EncodeNumericAttributePredictionAccuracyInfoMapToBytes(inputMap NumericAttributePredictionAccuracyInfoMap)([]byte, error){ - - buffer := new(bytes.Buffer) - - encoder := gob.NewEncoder(buffer) - - err := encoder.Encode(inputMap) - if (err != nil) { return nil, err } - - inputMapBytes := buffer.Bytes() - - return inputMapBytes, nil -} - -func DecodeBytesToNumericAttributePredictionAccuracyInfoMap(inputBytes []byte)(NumericAttributePredictionAccuracyInfoMap, error){ - - if (inputBytes == nil){ - return nil, errors.New("DecodeBytesToNumericAttributePredictionAccuracyInfoMap called with nil inputBytes.") - } - - buffer := bytes.NewBuffer(inputBytes) - - decoder := gob.NewDecoder(buffer) - - var newNumericAttributePredictionAccuracyInfoMap NumericAttributePredictionAccuracyInfoMap - - err := decoder.Decode(&newNumericAttributePredictionAccuracyInfoMap) - if (err != nil){ return nil, err } - - return newNumericAttributePredictionAccuracyInfoMap, nil -} - -//Outputs: -// -bool: Neural network model exists for this trait (trait prediction is possible for this trait) -// -bool: Trait prediction is possible for this user (User has at least 1 known trait locus value) -// -string: Predicted trait outcome (Example: "Blue") -// -int: Confidence: Probability (0-100) that the prediction is accurate -// -int: Quantity of loci known -// -int: Quantity of phased loci -// -error -func GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap(traitName string, genomeMap map[int64]locusValue.LocusValue)(bool, bool, string, int, int, int, error){ - - traitObject, err := traits.GetTraitObject(traitName) - if (err != nil) { return false, false, "", 0, 0, 0, err } - - traitIsDiscreteOrNumeric := traitObject.DiscreteOrNumeric - if (traitIsDiscreteOrNumeric != "Discrete"){ - return false, false, "", 0, 0, 0, errors.New("GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap called with non-discrete trait: " + traitName) - } - - // This is a map of rsIDs which influence this trait - traitRSIDsList := traitObject.LociList - - if (len(traitRSIDsList) == 0){ - // Neural network trait prediction is not possible for this trait - return false, false, "", 0, 0, 0, nil - } - - predictionModelExists, predictionModelBytes := geneticPredictionModels.GetGeneticPredictionModelBytes(traitName) - if (predictionModelExists == false){ - // Neural network trait prediction is not possible for this trait - return false, false, "", 0, 0, 0, nil - } - - traitRSIDsListCopy := slices.Clone(traitRSIDsList) - slices.Sort(traitRSIDsListCopy) - - neuralNetworkInput, quantityOfLociKnown, quantityOfPhasedLoci, err := createInputNeuralNetworkLayerFromGenomeMap(traitRSIDsListCopy, genomeMap) - if (err != nil) { return false, false, "", 0, 0, 0, err } - - if (quantityOfLociKnown == 0){ - // We can't predict anything about this trait for this genome - return true, false, "", 0, 0, 0, nil - } - - neuralNetworkObject, err := DecodeBytesToNeuralNetworkObject(predictionModelBytes) - if (err != nil) { return false, false, "", 0, 0, 0, err } - - outputLayer, err := GetNeuralNetworkRawPrediction(&neuralNetworkObject, false, neuralNetworkInput) - if (err != nil) { return false, false, "", 0, 0, 0, err } - - predictedOutcomeName, err := GetDiscreteOutcomeNameFromOutputLayer(traitName, false, outputLayer) - if (err != nil) { return false, false, "", 0, 0, 0, err } - - modelTraitAccuracyInfoFile, err := geneticPredictionModels.GetPredictionModelDiscreteTraitAccuracyInfoBytes(traitName) - if (err != nil) { return false, false, "", 0, 0, 0, err } - - modelTraitAccuracyInfoMap, err := DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap(modelTraitAccuracyInfoFile) - if (err != nil) { return false, false, "", 0, 0, 0, err } - - // We find the model trait accuracy info object that is the most similar to our predicted outcome - - getPredictionAccuracy := func()int{ - - totalNumberOfTraitLoci := len(traitRSIDsList) - - proportionOfLociTested := float64(quantityOfLociKnown)/float64(totalNumberOfTraitLoci) - percentageOfLociTested := int(proportionOfLociTested * 100) - - proportionOfPhasedLoci := float64(quantityOfPhasedLoci)/float64(totalNumberOfTraitLoci) - percentageOfPhasedLoci := int(proportionOfPhasedLoci * 100) - - // This is a value between 0 and 100 that represents the most likely accuracy probability for this prediction - closestPredictionAccuracy := 0 - - // This is a value that represents the distance our closest prediction accuracy has from the current prediction - // Consider each prediction accuracy value on an (X,Y) coordinate plane - // X = Number of loci tested - // Y = Number of phased loci - closestPredictionAccuracyDistance := float64(0) - - anyOutcomeAccuracyFound := false - - for traitOutcomeInfo, traitPredictionAccuracyInfo := range modelTraitAccuracyInfoMap{ - - outcomeName := traitOutcomeInfo.OutcomeName - if (outcomeName != predictedOutcomeName){ - continue - } - - probabilityOfCorrectOutcomePrediction := traitPredictionAccuracyInfo.ProbabilityOfCorrectOutcomePrediction - - currentPercentageOfLociTested := traitOutcomeInfo.PercentageOfLociTested - currentPercentageOfPhasedLoci := traitOutcomeInfo.PercentageOfPhasedLoci - - // Distance Formula for 2 coordinates (x1, y1) and (x2, y2): - // distance = √((x2 - x1)^2 + (y2 - y1)^2) - - differenceInX := float64(currentPercentageOfLociTested - percentageOfLociTested) - differenceInY := float64(currentPercentageOfPhasedLoci - percentageOfPhasedLoci) - - distance := math.Sqrt(math.Pow(differenceInX, 2) + math.Pow(differenceInY, 2)) - - if (distance == 0){ - // We found the exact prediction accuracy - return probabilityOfCorrectOutcomePrediction - } - - if (anyOutcomeAccuracyFound == false){ - closestPredictionAccuracyDistance = distance - closestPredictionAccuracy = probabilityOfCorrectOutcomePrediction - anyOutcomeAccuracyFound = true - continue - } else { - if (distance < closestPredictionAccuracyDistance){ - closestPredictionAccuracyDistance = distance - closestPredictionAccuracy = probabilityOfCorrectOutcomePrediction - } - } - } - - if (anyOutcomeAccuracyFound == false){ - // This means that our model has never actually predicted this outcome - // This shouldn't happen unless our model is really bad, or our training set has very few people with this outcome. - // We return a 0% accuracy rating - return 0 - } - - return closestPredictionAccuracy - } - - predictionAccuracy := getPredictionAccuracy() - - return true, true, predictedOutcomeName, predictionAccuracy, quantityOfLociKnown, quantityOfPhasedLoci, nil -} - -// This function is used to predict numeric traits and polygenic disease risk scores -//Outputs: -// -bool: Neural network model exists for this attribute (neural network prediction is possible for this attribute) -// -bool: Attribute prediction is possible for this user (User has at least 1 known attribute locus value) -// -float64: Predicted attribute outcome (Example: Height in centimeters) -// -map[int]float64: Accuracy ranges map -// -Map Structure: Probability prediction is accurate (X) -> Distance from prediction that must be travelled in both directions to -// create a range in which the true value will fall into, X% of the time -// -int: Quantity of loci known -// -int: Quantity of phased loci -// -error -func GetNeuralNetworkNumericAttributePredictionFromGenomeMap(attributeName string, attributeLociList []int64, genomeMap map[int64]locusValue.LocusValue)(bool, bool, float64, map[int]float64, int, int, error){ - - predictionModelExists, predictionModelBytes := geneticPredictionModels.GetGeneticPredictionModelBytes(attributeName) - if (predictionModelExists == false){ - // Prediction is not possible for this attribute - return false, false, 0, nil, 0, 0, nil - } - - if (len(attributeLociList) == 0){ - return false, false, 0, nil, 0, 0, errors.New("GetNeuralNetworkNumericAttributePredictionFromGenomeMap called with empty attributeLociList for attribute with an existing neural network.") - } - - attributeLociListCopy := slices.Clone(attributeLociList) - slices.Sort(attributeLociListCopy) - - neuralNetworkInput, quantityOfLociKnown, quantityOfPhasedLoci, err := createInputNeuralNetworkLayerFromGenomeMap(attributeLociListCopy, genomeMap) - if (err != nil) { return false, false, 0, nil, 0, 0, err } - - if (quantityOfLociKnown == 0){ - // We can't predict anything about this attribute for this genome - return true, false, 0, nil, 0, 0, nil - } - - neuralNetworkObject, err := DecodeBytesToNeuralNetworkObject(predictionModelBytes) - if (err != nil) { return false, false, 0, nil, 0, 0, err } - - outputLayer, err := GetNeuralNetworkRawPrediction(&neuralNetworkObject, true, neuralNetworkInput) - if (err != nil) { return false, false, 0, nil, 0, 0, err } - - predictedOutcomeValue, err := GetNumericOutcomeValueFromOutputLayer(attributeName, outputLayer) - if (err != nil) { return false, false, 0, nil, 0, 0, err } - - modelAccuracyInfoFile, err := geneticPredictionModels.GetPredictionModelNumericAttributeAccuracyInfoBytes(attributeName) - if (err != nil) { return false, false, 0, nil, 0, 0, err } - - modelAccuracyInfoMap, err := DecodeBytesToNumericAttributePredictionAccuracyInfoMap(modelAccuracyInfoFile) - if (err != nil) { return false, false, 0, nil, 0, 0, err } - - // We create a prediction confidence ranges map for our prediction - - getPredictionConfidenceRangesMap := func()map[int]float64{ - - totalNumberOfAttributeLoci := len(attributeLociListCopy) - - proportionOfLociTested := float64(quantityOfLociKnown)/float64(totalNumberOfAttributeLoci) - percentageOfLociTested := int(proportionOfLociTested * 100) - - proportionOfPhasedLoci := float64(quantityOfPhasedLoci)/float64(totalNumberOfAttributeLoci) - percentageOfPhasedLoci := int(proportionOfPhasedLoci * 100) - - // This is a value between 0 and 100 that represents the most similar confidence ranges map for this prediction - var closestPredictionConfidenceRangesMap map[int]float64 - - // This is a value that represents the distance our closest prediction confidence ranges map has from the current prediction - // Consider each prediction accuracy value on an (X,Y) coordinate plane - // X = Number of loci tested - // Y = Number of phased loci - closestPredictionConfidenceRangesMapDistance := float64(0) - - for attributeOutcomeInfo, attributePredictionConfidenceRangesMap := range modelAccuracyInfoMap{ - - currentPercentageOfLociTested := attributeOutcomeInfo.PercentageOfLociTested - currentPercentageOfPhasedLoci := attributeOutcomeInfo.PercentageOfPhasedLoci - - // Distance Formula for 2 coordinates (x1, y1) and (x2, y2): - // distance = √((x2 - x1)^2 + (y2 - y1)^2) - - differenceInX := float64(currentPercentageOfLociTested - percentageOfLociTested) - differenceInY := float64(currentPercentageOfPhasedLoci - percentageOfPhasedLoci) - - distance := math.Sqrt(math.Pow(differenceInX, 2) + math.Pow(differenceInY, 2)) - - if (distance == 0){ - // We found the exact prediction confidence ranges map - return attributePredictionConfidenceRangesMap - } - - if (closestPredictionConfidenceRangesMap == nil || distance < closestPredictionConfidenceRangesMapDistance){ - closestPredictionConfidenceRangesMapDistance = distance - closestPredictionConfidenceRangesMap = attributePredictionConfidenceRangesMap - } - } - - return closestPredictionConfidenceRangesMap - } - - predictionConfidenceRangesMap := getPredictionConfidenceRangesMap() - - return true, true, predictedOutcomeValue, predictionConfidenceRangesMap, quantityOfLociKnown, quantityOfPhasedLoci, nil -} - - //Outputs: // -[]float32: Input layer for neural network // -int: Quantity of known loci // -int: Quantity of phased loci // -error -func createInputNeuralNetworkLayerFromGenomeMap(rsidsList []int64, genomeMap map[int64]locusValue.LocusValue)([]float32, int, int, error){ +func CreateInputNeuralNetworkLayerFromGenomeMap(rsidsList []int64, genomeMap map[int64]locusValue.LocusValue)([]float32, int, int, error){ // In the inputLayer, each locus value is represented by 3 neurons: // 1. LocusExists/LocusIsPhased @@ -1306,7 +809,7 @@ func CreateGeneticPredictionTrainingData_OpenSNP( return true, trainingDataList, nil } -func GetNewUntrainedNeuralNetworkObject(attributeName string)(*NeuralNetwork, error){ +func GetNewUntrainedNeuralNetworkObject(attributeName string)(*geneticPredictionModels.NeuralNetwork, error){ layer1NeuronCount, layer2NeuronCount, layer3NeuronCount, layer4NeuronCount, err := getNeuralNetworkLayerSizes(attributeName) if (err != nil) { return nil, err } @@ -1359,31 +862,18 @@ func GetNewUntrainedNeuralNetworkObject(attributeName string)(*NeuralNetwork, er layer2 := getNewNeuralNetworkLayerWeights("Weights2", layer2NeuronCount, layer3NeuronCount) layer3 := getNewNeuralNetworkLayerWeights("Weights3", layer3NeuronCount, layer4NeuronCount) - newNeuralNetworkObject := NeuralNetwork{ + newNeuralNetworkObject := geneticPredictionModels.NeuralNetwork{ - graph: newGraph, + Graph: newGraph, - weights1: layer1, - weights2: layer2, - weights3: layer3, + Weights1: layer1, + Weights2: layer2, + Weights3: layer3, } return &newNeuralNetworkObject, nil } -// This function returns the weights of the neural network -// We need this for training -func (inputNetwork *NeuralNetwork)getLearnables()gorgonia.Nodes{ - - weights1 := inputNetwork.weights1 - weights2 := inputNetwork.weights2 - weights3 := inputNetwork.weights3 - - result := gorgonia.Nodes{weights1, weights2, weights3} - - return result -} - // This function will train the neural network // The function is passed a batch of TrainingData examples to train on @@ -1402,12 +892,12 @@ func (inputNetwork *NeuralNetwork)getLearnables()gorgonia.Nodes{ // Outputs: // -bool: Process completed (was not stopped mid-way) // -error -func TrainNeuralNetwork(attributeName string, attributeIsNumeric bool, neuralNetworkObject *NeuralNetwork, getNextTrainingData func()(bool, bool, TrainingData, error))(bool, error){ +func TrainNeuralNetwork(attributeName string, attributeIsNumeric bool, neuralNetworkObject *geneticPredictionModels.NeuralNetwork, getNextTrainingData func()(bool, bool, TrainingData, error))(bool, error){ layer1NeuronCount, _, _, layer4NeuronCount, err := getNeuralNetworkLayerSizes(attributeName) if (err != nil) { return false, err } - neuralNetworkGraph := neuralNetworkObject.graph + neuralNetworkGraph := neuralNetworkObject.Graph // We first create the input and output nodes // They don't have any values yet. @@ -1424,11 +914,11 @@ func TrainNeuralNetwork(attributeName string, attributeIsNumeric bool, neuralNet gorgonia.WithShape(1, layer4NeuronCount), ) - err = neuralNetworkObject.buildNeuralNetwork(trainingDataInputNode, attributeIsNumeric) + err = neuralNetworkObject.BuildNeuralNetwork(trainingDataInputNode, attributeIsNumeric) if (err != nil) { return false, err } // This computes the loss (how accurate was our prediction) - losses, err := gorgonia.Sub(trainingDataExpectedOutputNode, neuralNetworkObject.prediction) + losses, err := gorgonia.Sub(trainingDataExpectedOutputNode, neuralNetworkObject.Prediction) if (err != nil) { return false, err } squareOfLosses, err := gorgonia.Square(losses) @@ -1438,7 +928,7 @@ func TrainNeuralNetwork(attributeName string, attributeIsNumeric bool, neuralNet cost, err := gorgonia.Mean(squareOfLosses) if (err != nil) { return false, err } - neuralNetworkLearnables := neuralNetworkObject.getLearnables() + neuralNetworkLearnables := neuralNetworkObject.GetLearnables() // Grad takes a scalar cost node and a list of with-regards-to, and returns the gradient _, err = gorgonia.Grad(cost, neuralNetworkLearnables...) @@ -1515,9 +1005,9 @@ func TrainNeuralNetwork(attributeName string, attributeIsNumeric bool, neuralNet // Outputs: // -[]float32: Output neurons // -error -func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, attributeIsNumeric bool, inputLayer []float32)([]float32, error){ +func GetNeuralNetworkRawPrediction(inputNeuralNetwork *geneticPredictionModels.NeuralNetwork, attributeIsNumeric bool, inputLayer []float32)([]float32, error){ - neuralNetworkGraph := inputNeuralNetwork.graph + neuralNetworkGraph := inputNeuralNetwork.Graph numberOfInputNeurons := len(inputLayer) @@ -1539,12 +1029,12 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, attributeI if (err != nil) { return nil, err } - err = inputNeuralNetwork.buildNeuralNetwork(inputNode, attributeIsNumeric) + err = inputNeuralNetwork.BuildNeuralNetwork(inputNode, attributeIsNumeric) if (err != nil){ return nil, err } // Now we create a virtual machine to compute the prediction - neuralNetworkLearnables := inputNeuralNetwork.getLearnables() + neuralNetworkLearnables := inputNeuralNetwork.GetLearnables() bindDualValues := gorgonia.BindDualValues(neuralNetworkLearnables...) @@ -1553,7 +1043,7 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, attributeI err = virtualMachine.RunAll() if (err != nil) { return nil, err } - prediction := inputNeuralNetwork.prediction + prediction := inputNeuralNetwork.Prediction predictionValues := prediction.Value().Data().([]float32) @@ -1561,71 +1051,4 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, attributeI } -// This function will take a neural network and input layer and build the network to be able to compute a prediction -// We need to run a virtual machine after calling this function in order for the prediction to be generated -func (inputNetwork *NeuralNetwork)buildNeuralNetwork(inputLayer *gorgonia.Node, predictionIsNumeric bool)error{ - - // We copy node pointer (says to do this in a resource i'm reading) - - inputLayerCopy := inputLayer - - // We multiply weights at each layer and perform ReLU (Rectification) after each multiplication - - weights1 := inputNetwork.weights1 - - layer1Product, err := gorgonia.Mul(inputLayerCopy, weights1) - if (err != nil) { - return errors.New("Layer 1 multiplication failed: " + err.Error()) - } - - layer1ProductRectified, err := gorgonia.Rectify(layer1Product) - if (err != nil){ - return errors.New("Layer 1 Rectify failed: " + err.Error()) - } - - weights2 := inputNetwork.weights2 - - layer2Product, err := gorgonia.Mul(layer1ProductRectified, weights2) - if (err != nil) { - return errors.New("Layer 2 multiplication failed: " + err.Error()) - } - - layer2ProductRectified, err := gorgonia.Rectify(layer2Product) - if (err != nil){ - return errors.New("Layer 2 Rectify failed: " + err.Error()) - } - - weights3 := inputNetwork.weights3 - - layer3Product, err := gorgonia.Mul(layer2ProductRectified, weights3) - if (err != nil) { - return errors.New("Layer 3 multiplication failed: " + err.Error()) - } - - if (predictionIsNumeric == false){ - - // We SoftMax the output to get the prediction - - prediction, err := gorgonia.SoftMax(layer3Product) - if (err != nil) { - return errors.New("SoftMax failed: " + err.Error()) - } - - inputNetwork.prediction = prediction - - } else { - - // We Sigmoid the output to get the prediction - - prediction, err := gorgonia.Sigmoid(layer3Product) - if (err != nil) { - return errors.New("Sigmoid failed: " + err.Error()) - } - - inputNetwork.prediction = prediction - } - - return nil -} - diff --git a/internal/genetics/geneticPrediction/geneticPrediction_test.go b/internal/genetics/geneticPrediction/geneticPrediction_test.go index faf7757..c3b753c 100644 --- a/internal/genetics/geneticPrediction/geneticPrediction_test.go +++ b/internal/genetics/geneticPrediction/geneticPrediction_test.go @@ -2,6 +2,7 @@ package geneticPrediction_test import "seekia/internal/genetics/geneticPrediction" +import "seekia/internal/genetics/geneticPredictionModels" import "testing" @@ -14,12 +15,12 @@ func TestEncodeNeuralNetwork(t *testing.T){ t.Fatalf("GetNewUntrainedNeuralNetworkObject failed: " + err.Error()) } - neuralNetworkBytes, err := geneticPrediction.EncodeNeuralNetworkObjectToBytes(*neuralNetworkObject) + neuralNetworkBytes, err := geneticPredictionModels.EncodeNeuralNetworkObjectToBytes(*neuralNetworkObject) if (err != nil){ t.Fatalf("EncodeNeuralNetworkObjectToBytes failed: " + err.Error()) } - _, err = geneticPrediction.DecodeBytesToNeuralNetworkObject(neuralNetworkBytes) + _, err = geneticPredictionModels.DecodeBytesToNeuralNetworkObject(neuralNetworkBytes) if (err != nil){ t.Fatalf("DecodeBytesToNeuralNetworkObject failed: " + err.Error()) } diff --git a/internal/genetics/geneticPredictionModels/geneticPredictionModels.go b/internal/genetics/geneticPredictionModels/geneticPredictionModels.go new file mode 100644 index 0000000..0765760 --- /dev/null +++ b/internal/genetics/geneticPredictionModels/geneticPredictionModels.go @@ -0,0 +1,230 @@ + +// geneticPredictionModels provides the data structures and functions to represent, encode, and decode genetic prediction models +// Prediction models are used to predict polygenic disease risk scores and trait outcomes + +package geneticPredictionModels + +import "gorgonia.org/gorgonia" +import "gorgonia.org/tensor" + +import "bytes" +import "encoding/gob" +import "errors" + + +type NeuralNetwork struct{ + + // ExprGraph is a data structure for a directed acyclic graph (of expressions). + Graph *gorgonia.ExprGraph + + // These are the weights for each layer of neurons + Weights1 *gorgonia.Node + Weights2 *gorgonia.Node + Weights3 *gorgonia.Node + + // This is the computed prediction + Prediction *gorgonia.Node +} + +// This function returns the weights of the neural network +// We need this for training +func (inputNetwork *NeuralNetwork)GetLearnables()gorgonia.Nodes{ + + weights1 := inputNetwork.Weights1 + weights2 := inputNetwork.Weights2 + weights3 := inputNetwork.Weights3 + + result := gorgonia.Nodes{weights1, weights2, weights3} + + return result +} + +// We use this to store a neural network's weights as a .gob file +type neuralNetworkForEncoding struct{ + + // These are the weights for each layer of neurons + Weights1 []float32 + Weights2 []float32 + Weights3 []float32 + + // These represent the quantity of rows and columns for each weight layer + Weights1Rows int + Weights1Columns int + Weights2Rows int + Weights2Columns int + Weights3Rows int + Weights3Columns int +} + +func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte, error){ + + weights1 := inputNeuralNetwork.Weights1 + weights2 := inputNeuralNetwork.Weights2 + weights3 := inputNeuralNetwork.Weights3 + + weights1Slice := weights1.Value().Data().([]float32) + weights2Slice := weights2.Value().Data().([]float32) + weights3Slice := weights3.Value().Data().([]float32) + + weights1Rows := weights1.Shape()[0] + weights1Columns := weights1.Shape()[1] + weights2Rows := weights2.Shape()[0] + weights2Columns := weights2.Shape()[1] + weights3Rows := weights3.Shape()[0] + weights3Columns := weights3.Shape()[1] + + newNeuralNetworkForEncoding := neuralNetworkForEncoding{ + Weights1: weights1Slice, + Weights2: weights2Slice, + Weights3: weights3Slice, + + Weights1Rows: weights1Rows, + Weights1Columns: weights1Columns, + Weights2Rows: weights2Rows, + Weights2Columns: weights2Columns, + Weights3Rows: weights3Rows, + Weights3Columns: weights3Columns, + } + + buffer := new(bytes.Buffer) + + encoder := gob.NewEncoder(buffer) + + err := encoder.Encode(newNeuralNetworkForEncoding) + if (err != nil) { return nil, err } + + neuralNetworkBytes := buffer.Bytes() + + return neuralNetworkBytes, nil +} + +func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork, error){ + + if (inputNeuralNetwork == nil){ + return NeuralNetwork{}, errors.New("DecodeBytesToNeuralNetworkObject called with nil inputNeuralNetwork.") + } + + buffer := bytes.NewBuffer(inputNeuralNetwork) + + decoder := gob.NewDecoder(buffer) + + var newNeuralNetworkForEncoding neuralNetworkForEncoding + + err := decoder.Decode(&newNeuralNetworkForEncoding) + if (err != nil){ return NeuralNetwork{}, err } + + weights1 := newNeuralNetworkForEncoding.Weights1 + weights2 := newNeuralNetworkForEncoding.Weights2 + weights3 := newNeuralNetworkForEncoding.Weights3 + + weights1Rows := newNeuralNetworkForEncoding.Weights1Rows + weights1Columns := newNeuralNetworkForEncoding.Weights1Columns + weights2Rows := newNeuralNetworkForEncoding.Weights2Rows + weights2Columns := newNeuralNetworkForEncoding.Weights2Columns + weights3Rows := newNeuralNetworkForEncoding.Weights3Rows + weights3Columns := newNeuralNetworkForEncoding.Weights3Columns + + // This is the graph object we add each layer to + newGraph := gorgonia.NewGraph() + + // A layer is a column of neurons + // Each neuron has an initial value between 0 and 1 + getNewNeuralNetworkLayerWeights := func(layerName string, layerNeuronRows int, layerNeuronColumns int, layerWeightsList []float32)*gorgonia.Node{ + + layerNameObject := gorgonia.WithName(layerName) + + layerBacking := tensor.WithBacking(layerWeightsList) + layerShape := tensor.WithShape(layerNeuronRows, layerNeuronColumns) + layerTensor := tensor.New(layerBacking, layerShape) + + layerValueObject := gorgonia.WithValue(layerTensor) + + layerObject := gorgonia.NewMatrix(newGraph, tensor.Float32, layerNameObject, layerValueObject) + + return layerObject + } + + layer1 := getNewNeuralNetworkLayerWeights("Weights1", weights1Rows, weights1Columns, weights1) + layer2 := getNewNeuralNetworkLayerWeights("Weights2", weights2Rows, weights2Columns, weights2) + layer3 := getNewNeuralNetworkLayerWeights("Weights3", weights3Rows, weights3Columns, weights3) + + newNeuralNetworkObject := NeuralNetwork{ + + Graph: newGraph, + + Weights1: layer1, + Weights2: layer2, + Weights3: layer3, + } + + return newNeuralNetworkObject, nil +} + + +// This function will take a neural network and input layer and build the network to be able to compute a prediction +// We need to run a virtual machine after calling this function in order for the prediction to be generated +func (inputNetwork *NeuralNetwork)BuildNeuralNetwork(inputLayer *gorgonia.Node, predictionIsNumeric bool)error{ + + // We copy node pointer (says to do this in a resource i'm reading) + + inputLayerCopy := inputLayer + + // We multiply weights at each layer and perform ReLU (Rectification) after each multiplication + + weights1 := inputNetwork.Weights1 + + layer1Product, err := gorgonia.Mul(inputLayerCopy, weights1) + if (err != nil) { + return errors.New("Layer 1 multiplication failed: " + err.Error()) + } + + layer1ProductRectified, err := gorgonia.Rectify(layer1Product) + if (err != nil){ + return errors.New("Layer 1 Rectify failed: " + err.Error()) + } + + weights2 := inputNetwork.Weights2 + + layer2Product, err := gorgonia.Mul(layer1ProductRectified, weights2) + if (err != nil) { + return errors.New("Layer 2 multiplication failed: " + err.Error()) + } + + layer2ProductRectified, err := gorgonia.Rectify(layer2Product) + if (err != nil){ + return errors.New("Layer 2 Rectify failed: " + err.Error()) + } + + weights3 := inputNetwork.Weights3 + + layer3Product, err := gorgonia.Mul(layer2ProductRectified, weights3) + if (err != nil) { + return errors.New("Layer 3 multiplication failed: " + err.Error()) + } + + if (predictionIsNumeric == false){ + + // We SoftMax the output to get the prediction + + prediction, err := gorgonia.SoftMax(layer3Product) + if (err != nil) { + return errors.New("SoftMax failed: " + err.Error()) + } + + inputNetwork.Prediction = prediction + + } else { + + // We Sigmoid the output to get the prediction + + prediction, err := gorgonia.Sigmoid(layer3Product) + if (err != nil) { + return errors.New("Sigmoid failed: " + err.Error()) + } + + inputNetwork.Prediction = prediction + } + + return nil +} + diff --git a/resources/geneticPredictionModels/geneticPredictionModels.go b/resources/geneticPredictionModels/geneticPredictionModels.go deleted file mode 100644 index 7a4edd0..0000000 --- a/resources/geneticPredictionModels/geneticPredictionModels.go +++ /dev/null @@ -1,121 +0,0 @@ -// geneticPredictionModels contains genetic prediction neural network models for predicting genetic traits -// These are .gob encoded files of []float32 weights -// This package also contains prediction accuracy information for each model -// Prediction accuracy models describe information about how accurate the predictions made by the models are -// All of the files in this package are created by the Create Genetic Models utility. -// This utility is located in /utilities/createGeneticModels/createGeneticModels.go - -package geneticPredictionModels - -import _ "embed" - -import "errors" - - -//go:embed predictionModels/EyeColorModel.gob -var predictionModel_EyeColor []byte - -//go:embed predictionModels/LactoseToleranceModel.gob -var predictionModel_LactoseTolerance []byte - -//go:embed predictionModels/HeightModel.gob -var predictionModel_Height []byte - -//go:embed predictionModels/AutismModel.gob -var predictionModel_Autism []byte - -//go:embed predictionModels/HomosexualnessModel.gob -var predictionModel_Homosexualness []byte - -//go:embed predictionModels/ObesityModel.gob -var predictionModel_Obesity []byte - - -//Outputs: -// -bool: Model exists -// -[]byte -func GetGeneticPredictionModelBytes(traitName string)(bool, []byte){ - - switch traitName{ - - case "Eye Color":{ - return true, predictionModel_EyeColor - } - case "Lactose Tolerance":{ - return true, predictionModel_LactoseTolerance - } - case "Height":{ - return true, predictionModel_Height - } - case "Autism":{ - return true, predictionModel_Autism - } - case "Homosexualness":{ - return true, predictionModel_Homosexualness - } - case "Obesity":{ - return true, predictionModel_Obesity - } - } - - return false, nil -} - -//go:embed predictionModelAccuracies/EyeColorModelAccuracy.gob -var predictionAccuracy_EyeColor []byte - -//go:embed predictionModelAccuracies/LactoseToleranceModelAccuracy.gob -var predictionAccuracy_LactoseTolerance []byte - - -// The files returned by this function are .gob encoded geneticPrediction.DiscreteTraitPredictionAccuracyInfoMap objects -func GetPredictionModelDiscreteTraitAccuracyInfoBytes(traitName string)([]byte, error){ - - switch traitName{ - case "Eye Color":{ - return predictionAccuracy_EyeColor, nil - } - case "Lactose Tolerance":{ - return predictionAccuracy_LactoseTolerance, nil - } - } - - return nil, errors.New("GetPredictionModelDiscreteTraitAccuracyInfoBytes called with unknown traitName: " + traitName) -} - - -//go:embed predictionModelAccuracies/HeightModelAccuracy.gob -var predictionAccuracy_Height []byte - -//go:embed predictionModelAccuracies/AutismModelAccuracy.gob -var predictionAccuracy_Autism []byte - -//go:embed predictionModelAccuracies/HomosexualnessModelAccuracy.gob -var predictionAccuracy_Homosexualness []byte - -//go:embed predictionModelAccuracies/ObesityModelAccuracy.gob -var predictionAccuracy_Obesity []byte - - -// The files returned by this function are .gob encoded geneticPrediction.NumericAttributePredictionAccuracyInfoMap objects -func GetPredictionModelNumericAttributeAccuracyInfoBytes(attributeName string)([]byte, error){ - - switch attributeName{ - case "Height":{ - return predictionAccuracy_Height, nil - } - case "Autism":{ - return predictionAccuracy_Autism, nil - } - case "Homosexualness":{ - return predictionAccuracy_Homosexualness, nil - } - case "Obesity":{ - return predictionAccuracy_Obesity, nil - } - } - - return nil, errors.New("GetPredictionModelNumericAttributeAccuracyInfoBytes called with unknown attributeName: " + attributeName) -} - - diff --git a/resources/geneticPredictionModels/geneticPredictionModels_test.go b/resources/geneticPredictionModels/geneticPredictionModels_test.go deleted file mode 100644 index b969850..0000000 --- a/resources/geneticPredictionModels/geneticPredictionModels_test.go +++ /dev/null @@ -1,63 +0,0 @@ -package geneticPredictionModels_test - -import "seekia/resources/geneticPredictionModels" - -import "testing" - -import "seekia/internal/genetics/geneticPrediction" - - -func TestGeneticPredictionModels(t *testing.T){ - - attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Obesity"} - - for _, attributeName := range attributeNamesList{ - - modelFound, modelBytes := geneticPredictionModels.GetGeneticPredictionModelBytes(attributeName) - if (modelFound == false){ - t.Fatalf("GetGeneticPredictionModelBytes failed to find model for trait: " + attributeName) - } - - _, err := geneticPrediction.DecodeBytesToNeuralNetworkObject(modelBytes) - if (err != nil){ - t.Fatalf("DecodeBytesToNeuralNetworkObject failed: " + err.Error()) - } - } -} - - -func TestGeneticPredictionModelAccuracies(t *testing.T){ - - discreteTraitNamesList := []string{"Eye Color", "Lactose Tolerance"} - - for _, traitName := range discreteTraitNamesList{ - - accuracyInfoBytes, err := geneticPredictionModels.GetPredictionModelDiscreteTraitAccuracyInfoBytes(traitName) - if (err != nil){ - t.Fatalf("GetPredictionModelDiscreteTraitAccuracyInfoBytes failed: " + err.Error()) - } - - _, err = geneticPrediction.DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap(accuracyInfoBytes) - if (err != nil){ - t.Fatalf("DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap failed: " + err.Error()) - } - } - - numericAttributeNamesList := []string{"Height", "Autism", "Homosexualness", "Obesity"} - - for _, attributeName := range numericAttributeNamesList{ - - accuracyInfoBytes, err := geneticPredictionModels.GetPredictionModelNumericAttributeAccuracyInfoBytes(attributeName) - if (err != nil){ - t.Fatalf("GetPredictionModelNumericAttributeAccuracyInfoBytes failed: " + err.Error()) - } - - _, err = geneticPrediction.DecodeBytesToNumericAttributePredictionAccuracyInfoMap(accuracyInfoBytes) - if (err != nil){ - t.Fatalf("DecodeBytesToNumericAttributePredictionAccuracyInfoMap failed: " + err.Error()) - } - } -} - - - diff --git a/resources/geneticPredictionModels/predictionModelAccuracies/AutismModelAccuracy.gob b/resources/trainedPredictionModels/predictionModelAccuracies/AutismModelAccuracy.gob similarity index 100% rename from resources/geneticPredictionModels/predictionModelAccuracies/AutismModelAccuracy.gob rename to resources/trainedPredictionModels/predictionModelAccuracies/AutismModelAccuracy.gob diff --git a/resources/geneticPredictionModels/predictionModelAccuracies/EyeColorModelAccuracy.gob b/resources/trainedPredictionModels/predictionModelAccuracies/EyeColorModelAccuracy.gob similarity index 100% rename from resources/geneticPredictionModels/predictionModelAccuracies/EyeColorModelAccuracy.gob rename to resources/trainedPredictionModels/predictionModelAccuracies/EyeColorModelAccuracy.gob diff --git a/resources/geneticPredictionModels/predictionModelAccuracies/HeightModelAccuracy.gob b/resources/trainedPredictionModels/predictionModelAccuracies/HeightModelAccuracy.gob similarity index 100% rename from resources/geneticPredictionModels/predictionModelAccuracies/HeightModelAccuracy.gob rename to resources/trainedPredictionModels/predictionModelAccuracies/HeightModelAccuracy.gob diff --git a/resources/geneticPredictionModels/predictionModelAccuracies/HomosexualnessModelAccuracy.gob b/resources/trainedPredictionModels/predictionModelAccuracies/HomosexualnessModelAccuracy.gob similarity index 100% rename from resources/geneticPredictionModels/predictionModelAccuracies/HomosexualnessModelAccuracy.gob rename to resources/trainedPredictionModels/predictionModelAccuracies/HomosexualnessModelAccuracy.gob diff --git a/resources/geneticPredictionModels/predictionModelAccuracies/LactoseToleranceModelAccuracy.gob b/resources/trainedPredictionModels/predictionModelAccuracies/LactoseToleranceModelAccuracy.gob similarity index 100% rename from resources/geneticPredictionModels/predictionModelAccuracies/LactoseToleranceModelAccuracy.gob rename to resources/trainedPredictionModels/predictionModelAccuracies/LactoseToleranceModelAccuracy.gob diff --git a/resources/geneticPredictionModels/predictionModelAccuracies/ObesityModelAccuracy.gob b/resources/trainedPredictionModels/predictionModelAccuracies/ObesityModelAccuracy.gob similarity index 100% rename from resources/geneticPredictionModels/predictionModelAccuracies/ObesityModelAccuracy.gob rename to resources/trainedPredictionModels/predictionModelAccuracies/ObesityModelAccuracy.gob diff --git a/resources/geneticPredictionModels/predictionModels/AutismModel.gob b/resources/trainedPredictionModels/predictionModels/AutismModel.gob similarity index 100% rename from resources/geneticPredictionModels/predictionModels/AutismModel.gob rename to resources/trainedPredictionModels/predictionModels/AutismModel.gob diff --git a/resources/geneticPredictionModels/predictionModels/EyeColorModel.gob b/resources/trainedPredictionModels/predictionModels/EyeColorModel.gob similarity index 100% rename from resources/geneticPredictionModels/predictionModels/EyeColorModel.gob rename to resources/trainedPredictionModels/predictionModels/EyeColorModel.gob diff --git a/resources/geneticPredictionModels/predictionModels/HeightModel.gob b/resources/trainedPredictionModels/predictionModels/HeightModel.gob similarity index 100% rename from resources/geneticPredictionModels/predictionModels/HeightModel.gob rename to resources/trainedPredictionModels/predictionModels/HeightModel.gob diff --git a/resources/geneticPredictionModels/predictionModels/HomosexualnessModel.gob b/resources/trainedPredictionModels/predictionModels/HomosexualnessModel.gob similarity index 100% rename from resources/geneticPredictionModels/predictionModels/HomosexualnessModel.gob rename to resources/trainedPredictionModels/predictionModels/HomosexualnessModel.gob diff --git a/resources/geneticPredictionModels/predictionModels/LactoseToleranceModel.gob b/resources/trainedPredictionModels/predictionModels/LactoseToleranceModel.gob similarity index 100% rename from resources/geneticPredictionModels/predictionModels/LactoseToleranceModel.gob rename to resources/trainedPredictionModels/predictionModels/LactoseToleranceModel.gob diff --git a/resources/geneticPredictionModels/predictionModels/ObesityModel.gob b/resources/trainedPredictionModels/predictionModels/ObesityModel.gob similarity index 100% rename from resources/geneticPredictionModels/predictionModels/ObesityModel.gob rename to resources/trainedPredictionModels/predictionModels/ObesityModel.gob diff --git a/resources/trainedPredictionModels/trainedPredictionModels.go b/resources/trainedPredictionModels/trainedPredictionModels.go new file mode 100644 index 0000000..bba6893 --- /dev/null +++ b/resources/trainedPredictionModels/trainedPredictionModels.go @@ -0,0 +1,805 @@ +// trainedPredictionModels contains trained prediction neural network models for predicting genetic traits +// These models are stored as .gob encoded files of []float32 weights +// This package also contains prediction accuracy information for each model +// Prediction accuracy models describe information about how accurate the predictions made by the models are +// All of the files in this package are created by the Create Genetic Models utility. +// This utility is located in /utilities/createGeneticModels/createGeneticModels.go + +package trainedPredictionModels + +import "seekia/internal/genetics/geneticPrediction" +import "seekia/internal/genetics/geneticPredictionModels" +import "seekia/internal/genetics/locusValue" + +import _ "embed" + +import "math" +import "bytes" +import "encoding/gob" +import "slices" +import "sync" +import "errors" + + +// These are the trained prediction model files: + +//go:embed predictionModels/EyeColorModel.gob +var predictionModelFile_EyeColor []byte + +//go:embed predictionModels/LactoseToleranceModel.gob +var predictionModelFile_LactoseTolerance []byte + +//go:embed predictionModels/HeightModel.gob +var predictionModelFile_Height []byte + +//go:embed predictionModels/AutismModel.gob +var predictionModelFile_Autism []byte + +//go:embed predictionModels/HomosexualnessModel.gob +var predictionModelFile_Homosexualness []byte + +//go:embed predictionModels/ObesityModel.gob +var predictionModelFile_Obesity []byte + +// These are the trained prediction models +// Each model has a mutex so it will only be used to make 1 prediction at a time + +var predictionModel_EyeColor *geneticPredictionModels.NeuralNetwork +var predictionModelMutex_EyeColor sync.Mutex + +var predictionModel_LactoseTolerance *geneticPredictionModels.NeuralNetwork +var predictionModelMutex_LactoseTolerance sync.Mutex + +var predictionModel_Height *geneticPredictionModels.NeuralNetwork +var predictionModelMutex_Height sync.Mutex + +var predictionModel_Autism *geneticPredictionModels.NeuralNetwork +var predictionModelMutex_Autism sync.Mutex + +var predictionModel_Homosexualness *geneticPredictionModels.NeuralNetwork +var predictionModelMutex_Homosexualness sync.Mutex + +var predictionModel_Obesity *geneticPredictionModels.NeuralNetwork +var predictionModelMutex_Obesity sync.Mutex + + +// These are the discrete trait prediction model accuracy files: + +//go:embed predictionModelAccuracies/EyeColorModelAccuracy.gob +var predictionAccuracyFile_EyeColor []byte + +//go:embed predictionModelAccuracies/LactoseToleranceModelAccuracy.gob +var predictionAccuracyFile_LactoseTolerance []byte + + +// These are the discrete trait prediction model accuracy maps + +var predictionAccuracyMap_EyeColor DiscreteTraitPredictionAccuracyInfoMap +var predictionAccuracyMap_LactoseTolerance DiscreteTraitPredictionAccuracyInfoMap + + +// These are the numeric attribute prediction model accuracy files: + +//go:embed predictionModelAccuracies/HeightModelAccuracy.gob +var predictionAccuracyFile_Height []byte + +//go:embed predictionModelAccuracies/AutismModelAccuracy.gob +var predictionAccuracyFile_Autism []byte + +//go:embed predictionModelAccuracies/HomosexualnessModelAccuracy.gob +var predictionAccuracyFile_Homosexualness []byte + +//go:embed predictionModelAccuracies/ObesityModelAccuracy.gob +var predictionAccuracyFile_Obesity []byte + +// These are the numeric attribute prediction model accuracy maps + +var predictionAccuracyMap_Height NumericAttributePredictionAccuracyInfoMap +var predictionAccuracyMap_Autism NumericAttributePredictionAccuracyInfoMap +var predictionAccuracyMap_Homosexualness NumericAttributePredictionAccuracyInfoMap +var predictionAccuracyMap_Obesity NumericAttributePredictionAccuracyInfoMap + + +// This function has to be called once upon application startup +// We must also call it before certain tests +func InitializeTrainedPredictionModels()error{ + + // We first initialize the neural networks + + attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Obesity", "Homosexualness"} + + for _, attributeName := range attributeNamesList{ + + getPredictionModelFileBytes := func()([]byte, error){ + + switch attributeName{ + + case "Eye Color":{ + return predictionModelFile_EyeColor, nil + } + case "Lactose Tolerance":{ + return predictionModelFile_LactoseTolerance, nil + } + case "Height":{ + return predictionModelFile_Height, nil + } + case "Autism":{ + return predictionModelFile_Autism, nil + } + case "Obesity":{ + return predictionModelFile_Obesity, nil + } + case "Homosexualness":{ + return predictionModelFile_Homosexualness, nil + } + } + + return nil, errors.New("Trying to initialize genetic prediction model with unknown attributeName: " + attributeName) + } + + predictionModelFileBytes, err := getPredictionModelFileBytes() + if (err != nil) { return err } + + neuralNetworkObject, err := geneticPredictionModels.DecodeBytesToNeuralNetworkObject(predictionModelFileBytes) + if (err != nil) { return err } + + switch attributeName{ + + case "Eye Color":{ + predictionModel_EyeColor = &neuralNetworkObject + continue + } + case "Lactose Tolerance":{ + predictionModel_LactoseTolerance = &neuralNetworkObject + continue + } + case "Height":{ + predictionModel_Height = &neuralNetworkObject + continue + } + case "Autism":{ + predictionModel_Autism = &neuralNetworkObject + continue + } + case "Obesity":{ + predictionModel_Obesity = &neuralNetworkObject + continue + } + case "Homosexualness":{ + predictionModel_Homosexualness = &neuralNetworkObject + continue + } + } + + return errors.New("Trying to initialize genetic prediction model with unknown attributeName: " + attributeName) + } + + // Now we initialize the prediction accuracy information + // We start with discrete traits + + discreteTraitNamesList := []string{"Eye Color", "Lactose Tolerance"} + + for _, traitName := range discreteTraitNamesList{ + + getPredictionAccuracyFileBytes := func()([]byte, error){ + + switch traitName{ + case "Eye Color":{ + return predictionAccuracyFile_EyeColor, nil + } + case "Lactose Tolerance":{ + return predictionAccuracyFile_LactoseTolerance, nil + } + } + + return nil, errors.New("Prediction accuracy file not found for discrete trait: " + traitName) + } + + predictionAccuracyFileBytes, err := getPredictionAccuracyFileBytes() + if (err != nil) { return err } + + // We convert the gob encoded file to a map + + discreteTraitPredictionAccuracyInfoMap, err := decodeBytesToDiscreteTraitPredictionAccuracyInfoMap(predictionAccuracyFileBytes) + if (err != nil) { return err } + + // We initialize the global variables + + switch traitName{ + case "Eye Color":{ + predictionAccuracyMap_EyeColor = discreteTraitPredictionAccuracyInfoMap + continue + } + case "Lactose Tolerance":{ + predictionAccuracyMap_LactoseTolerance = discreteTraitPredictionAccuracyInfoMap + continue + } + } + + return errors.New("Unknown discrete trait name: " + traitName) + } + + // Now we process numeric attributes + + numericAttributeNamesList := []string{"Height", "Autism", "Homosexualness", "Obesity"} + + for _, traitName := range numericAttributeNamesList{ + + getPredictionAccuracyFileBytes := func()([]byte, error){ + + switch traitName{ + case "Height":{ + return predictionAccuracyFile_Height, nil + } + case "Autism":{ + return predictionAccuracyFile_Autism, nil + } + case "Homosexualness":{ + return predictionAccuracyFile_Homosexualness, nil + } + case "Obesity":{ + return predictionAccuracyFile_Obesity, nil + } + } + + return nil, errors.New("Prediction accuracy file not found for numeric trait: " + traitName) + } + + predictionAccuracyFileBytes, err := getPredictionAccuracyFileBytes() + if (err != nil) { return err } + + // We convert the gob encoded file to a map + + numericTraitPredictionAccuracyInfoMap, err := decodeBytesToNumericAttributePredictionAccuracyInfoMap(predictionAccuracyFileBytes) + if (err != nil) { return err } + + // We initialize the global variables + + switch traitName{ + case "Height":{ + predictionAccuracyMap_Height = numericTraitPredictionAccuracyInfoMap + continue + } + case "Autism":{ + predictionAccuracyMap_Autism = numericTraitPredictionAccuracyInfoMap + continue + } + case "Homosexualness":{ + predictionAccuracyMap_Homosexualness = numericTraitPredictionAccuracyInfoMap + continue + } + case "Obesity":{ + predictionAccuracyMap_Obesity = numericTraitPredictionAccuracyInfoMap + continue + } + } + + return errors.New("Unknown numeric trait name: " + traitName) + } + + return nil +} + +// We use this to check if a neural network exists for an attribute +func CheckIfAttributeNeuralNetworkExists(attributeName string)bool{ + + switch attributeName{ + + case "Eye Color", + "Lactose Tolerance", + "Height", + "Autism", + "Obesity", + "Homosexualness":{ + + return true + } + } + + return false +} + + +//Outputs: +// -bool: Neural network model exists for this trait (trait prediction is possible for this trait) +// -bool: Trait prediction is possible for this user (User has at least 1 known trait locus value) +// -string: Predicted trait outcome (Example: "Blue") +// -int: Confidence: Probability (0-100) that the prediction is accurate +// -int: Quantity of loci known +// -int: Quantity of phased loci +// -error +func GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap(traitName string, traitRSIDsList []int64, genomeMap map[int64]locusValue.LocusValue)(bool, bool, string, int, int, int, error){ + + getPredictionModelObject := func()(bool, *geneticPredictionModels.NeuralNetwork){ + + switch traitName{ + + case "Eye Color":{ + return true, predictionModel_EyeColor + } + case "Lactose Tolerance":{ + return true, predictionModel_LactoseTolerance + } + } + + return false, nil + } + + predictionModelExists, predictionModelObject := getPredictionModelObject() + if (predictionModelExists == false){ + // Neural network trait prediction is not possible for this trait + return false, false, "", 0, 0, 0, nil + } + + if (predictionModelObject == nil){ + return false, false, "", 0, 0, 0, errors.New("GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap called when trained prediction models are not initialized.") + } + + if (len(traitRSIDsList) == 0){ + return false, false, "", 0, 0, 0, errors.New("GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap called with empty traitRSIDsList for trait with a neural network.") + } + + traitRSIDsListCopy := slices.Clone(traitRSIDsList) + slices.Sort(traitRSIDsListCopy) + + neuralNetworkInput, quantityOfLociKnown, quantityOfPhasedLoci, err := geneticPrediction.CreateInputNeuralNetworkLayerFromGenomeMap(traitRSIDsListCopy, genomeMap) + if (err != nil) { return false, false, "", 0, 0, 0, err } + + if (quantityOfLociKnown == 0){ + // We can't predict anything about this trait for this genome + return true, false, "", 0, 0, 0, nil + } + + getPredictionOutcome := func()(string, error){ + + // We lock the mutex for the prediction model + + switch traitName{ + + case "Eye Color":{ + predictionModelMutex_EyeColor.Lock() + defer predictionModelMutex_EyeColor.Unlock() + } + case "Lactose Tolerance":{ + predictionModelMutex_LactoseTolerance.Lock() + defer predictionModelMutex_LactoseTolerance.Unlock() + } + default:{ + return "", errors.New("traitName not found: " + traitName) + } + } + + outputLayer, err := geneticPrediction.GetNeuralNetworkRawPrediction(predictionModelObject, false, neuralNetworkInput) + if (err != nil) { return "", err } + + predictedOutcomeName, err := geneticPrediction.GetDiscreteOutcomeNameFromOutputLayer(traitName, false, outputLayer) + if (err != nil) { return "", err } + + return predictedOutcomeName, nil + } + + predictedOutcome, err := getPredictionOutcome() + if (err != nil) { return false, false, "", 0, 0, 0, err } + + modelTraitAccuracyInfoMap, err := GetPredictionModelDiscreteTraitAccuracyInfoMap(traitName) + if (err != nil) { return false, false, "", 0, 0, 0, err } + + // We find the model trait accuracy info object that is the most similar to our predicted outcome + + getPredictionAccuracy := func()int{ + + totalNumberOfTraitLoci := len(traitRSIDsList) + + proportionOfLociTested := float64(quantityOfLociKnown)/float64(totalNumberOfTraitLoci) + percentageOfLociTested := int(proportionOfLociTested * 100) + + proportionOfPhasedLoci := float64(quantityOfPhasedLoci)/float64(totalNumberOfTraitLoci) + percentageOfPhasedLoci := int(proportionOfPhasedLoci * 100) + + // This is a value between 0 and 100 that represents the most likely accuracy probability for this prediction + closestPredictionAccuracy := 0 + + // This is a value that represents the distance our closest prediction accuracy has from the current prediction + // Consider each prediction accuracy value on an (X,Y) coordinate plane + // X = Number of loci tested + // Y = Number of phased loci + closestPredictionAccuracyDistance := float64(0) + + anyOutcomeAccuracyFound := false + + for traitOutcomeInfo, traitPredictionAccuracyInfo := range modelTraitAccuracyInfoMap{ + + outcomeName := traitOutcomeInfo.OutcomeName + if (outcomeName != predictedOutcome){ + continue + } + + probabilityOfCorrectOutcomePrediction := traitPredictionAccuracyInfo.ProbabilityOfCorrectOutcomePrediction + + currentPercentageOfLociTested := traitOutcomeInfo.PercentageOfLociTested + currentPercentageOfPhasedLoci := traitOutcomeInfo.PercentageOfPhasedLoci + + // Distance Formula for 2 coordinates (x1, y1) and (x2, y2): + // distance = √((x2 - x1)^2 + (y2 - y1)^2) + + differenceInX := float64(currentPercentageOfLociTested - percentageOfLociTested) + differenceInY := float64(currentPercentageOfPhasedLoci - percentageOfPhasedLoci) + + distance := math.Sqrt(math.Pow(differenceInX, 2) + math.Pow(differenceInY, 2)) + + if (distance == 0){ + // We found the exact prediction accuracy + return probabilityOfCorrectOutcomePrediction + } + + if (anyOutcomeAccuracyFound == false){ + closestPredictionAccuracyDistance = distance + closestPredictionAccuracy = probabilityOfCorrectOutcomePrediction + anyOutcomeAccuracyFound = true + continue + } else { + if (distance < closestPredictionAccuracyDistance){ + closestPredictionAccuracyDistance = distance + closestPredictionAccuracy = probabilityOfCorrectOutcomePrediction + } + } + } + + if (anyOutcomeAccuracyFound == false){ + // This means that our model has never actually predicted this outcome + // This shouldn't happen unless our model is really bad, or our training set has very few people with this outcome. + // We return a 0% accuracy rating + return 0 + } + + return closestPredictionAccuracy + } + + predictionAccuracy := getPredictionAccuracy() + + return true, true, predictedOutcome, predictionAccuracy, quantityOfLociKnown, quantityOfPhasedLoci, nil +} + + +// This function is used to predict numeric traits and polygenic disease risk scores +//Outputs: +// -bool: Neural network model exists for this attribute (neural network prediction is possible for this attribute) +// -bool: Attribute prediction is possible for this user (User has at least 1 known attribute locus value) +// -float64: Predicted attribute outcome (Example: Height in centimeters) +// -map[int]float64: Accuracy ranges map +// -Map Structure: Probability prediction is accurate (X) -> Distance from prediction that must be travelled in both directions to +// create a range in which the true value will fall into, X% of the time +// -int: Quantity of loci known +// -int: Quantity of phased loci +// -error +func GetNeuralNetworkNumericAttributePredictionFromGenomeMap(attributeName string, attributeLociList []int64, genomeMap map[int64]locusValue.LocusValue)(bool, bool, float64, map[int]float64, int, int, error){ + + getPredictionModelObject := func()(bool, *geneticPredictionModels.NeuralNetwork){ + + switch attributeName{ + + case "Height":{ + return true, predictionModel_Height + } + case "Autism":{ + return true, predictionModel_Autism + } + case "Obesity":{ + return true, predictionModel_Obesity + } + case "Homosexualness":{ + return true, predictionModel_Homosexualness + } + } + + return false, nil + } + + predictionModelExists, predictionModelObject := getPredictionModelObject() + if (predictionModelExists == false){ + // Neural network trait prediction is not possible for this trait + return false, false, 0, nil, 0, 0, nil + } + + if (predictionModelObject == nil){ + return false, false, 0, nil, 0, 0, errors.New("GetNeuralNetworkNumericAttributePredictionFromGenomeMap called when trained prediction models are not initialized.") + } + + if (len(attributeLociList) == 0){ + return false, false, 0, nil, 0, 0, errors.New("GetNeuralNetworkNumericAttributePredictionFromGenomeMap called with empty attributeLociList for an attribute with a neural network.") + } + + attributeLociListCopy := slices.Clone(attributeLociList) + slices.Sort(attributeLociListCopy) + + neuralNetworkInput, quantityOfLociKnown, quantityOfPhasedLoci, err := geneticPrediction.CreateInputNeuralNetworkLayerFromGenomeMap(attributeLociListCopy, genomeMap) + if (err != nil) { return false, false, 0, nil, 0, 0, err } + + if (quantityOfLociKnown == 0){ + // We can't predict anything about this attribute for this genome + return true, false, 0, nil, 0, 0, nil + } + + getPredictionOutcome := func()(float64, error){ + + // We lock the mutex for the prediction model + + switch attributeName{ + + case "Height":{ + predictionModelMutex_Height.Lock() + defer predictionModelMutex_Height.Unlock() + } + case "Autism":{ + predictionModelMutex_Autism.Lock() + defer predictionModelMutex_Autism.Unlock() + } + case "Obesity":{ + predictionModelMutex_Obesity.Lock() + defer predictionModelMutex_Obesity.Unlock() + } + case "Homosexualness":{ + predictionModelMutex_Homosexualness.Lock() + defer predictionModelMutex_Homosexualness.Unlock() + } + default:{ + return 0, errors.New("attributeName not found: " + attributeName) + } + } + + outputLayer, err := geneticPrediction.GetNeuralNetworkRawPrediction(predictionModelObject, true, neuralNetworkInput) + if (err != nil) { return 0, err } + + predictedOutcomeValue, err := geneticPrediction.GetNumericOutcomeValueFromOutputLayer(attributeName, outputLayer) + if (err != nil) { return 0, err } + + return predictedOutcomeValue, nil + } + + predictedOutcome, err := getPredictionOutcome() + if (err != nil) { return false, false, 0, nil, 0, 0, err } + + modelAccuracyInfoMap, err := GetPredictionModelNumericAttributeAccuracyInfoMap(attributeName) + if (err != nil) { return false, false, 0, nil, 0, 0, err } + + // We create a prediction confidence ranges map for our prediction + + getPredictionConfidenceRangesMap := func()map[int]float64{ + + totalNumberOfAttributeLoci := len(attributeLociListCopy) + + proportionOfLociTested := float64(quantityOfLociKnown)/float64(totalNumberOfAttributeLoci) + percentageOfLociTested := int(proportionOfLociTested * 100) + + proportionOfPhasedLoci := float64(quantityOfPhasedLoci)/float64(totalNumberOfAttributeLoci) + percentageOfPhasedLoci := int(proportionOfPhasedLoci * 100) + + // This is a value between 0 and 100 that represents the most similar confidence ranges map for this prediction + var closestPredictionConfidenceRangesMap map[int]float64 + + // This is a value that represents the distance our closest prediction confidence ranges map has from the current prediction + // Consider each prediction accuracy value on an (X,Y) coordinate plane + // X = Number of loci tested + // Y = Number of phased loci + closestPredictionConfidenceRangesMapDistance := float64(0) + + for attributeOutcomeInfo, attributePredictionConfidenceRangesMap := range modelAccuracyInfoMap{ + + currentPercentageOfLociTested := attributeOutcomeInfo.PercentageOfLociTested + currentPercentageOfPhasedLoci := attributeOutcomeInfo.PercentageOfPhasedLoci + + // Distance Formula for 2 coordinates (x1, y1) and (x2, y2): + // distance = √((x2 - x1)^2 + (y2 - y1)^2) + + differenceInX := float64(currentPercentageOfLociTested - percentageOfLociTested) + differenceInY := float64(currentPercentageOfPhasedLoci - percentageOfPhasedLoci) + + distance := math.Sqrt(math.Pow(differenceInX, 2) + math.Pow(differenceInY, 2)) + + if (distance == 0){ + // We found the exact prediction confidence ranges map + return attributePredictionConfidenceRangesMap + } + + if (closestPredictionConfidenceRangesMap == nil || distance < closestPredictionConfidenceRangesMapDistance){ + closestPredictionConfidenceRangesMapDistance = distance + closestPredictionConfidenceRangesMap = attributePredictionConfidenceRangesMap + } + } + + return closestPredictionConfidenceRangesMap + } + + predictionConfidenceRangesMap := getPredictionConfidenceRangesMap() + + return true, true, predictedOutcome, predictionConfidenceRangesMap, quantityOfLociKnown, quantityOfPhasedLoci, nil +} + + +// This map is used to store information about how accurate genetic prediction models are for discrete traits +// Map Structure: Discrete Trait Outcome Info -> Discrete Trait Prediction Accuracy Info +type DiscreteTraitPredictionAccuracyInfoMap map[DiscreteTraitOutcomeInfo]DiscreteTraitPredictionAccuracyInfo + +type DiscreteTraitOutcomeInfo struct{ + + // This is the outcome which was predicted + // Example: "Blue" + OutcomeName string + + // This is a value between 0-100 which describes the percentage of the loci which were tested for the input for the prediction + PercentageOfLociTested int + + // This is a value between 0-100 which describes the percentage of the tested loci which were phased for the input for the prediction + PercentageOfPhasedLoci int +} + +type DiscreteTraitPredictionAccuracyInfo struct{ + + // This contains the quantity of examples for the outcome with the specified percentageOfLociTested and percentageOfPhasedLoci + QuantityOfExamples int + + // This contains the quantity of predictions for the outcome with the specified percentageOfLociTested and percentageOfPhasedLoci + // Prediction = our model predicted this outcome + QuantityOfPredictions int + + // This stores the probability (0-100) that our model will accurately predict this outcome for a genome which has + // the specified percentageOfLociTested and percentageOfPhasedLoci + // In other words: What is the probability that if you give Seekia a blue-eyed genome, it will give you a correct Blue prediction? + // This value is only accurate is QuantityOfExamples > 0 + ProbabilityOfCorrectGenomePrediction int + + // This stores the probability (0-100) that our model is correct if our model predicts that a genome + // with the specified percentageOfLociTested and percentageOfPhasedLoci has this outcome + // In other words: What is the probability that if Seekia says a genome will have blue eyes, it is correct? + // This value is only accurate is QuantityOfPredictions > 0 + ProbabilityOfCorrectOutcomePrediction int +} + +func EncodeDiscreteTraitPredictionAccuracyInfoMapToBytes(inputMap DiscreteTraitPredictionAccuracyInfoMap)([]byte, error){ + + buffer := new(bytes.Buffer) + + encoder := gob.NewEncoder(buffer) + + err := encoder.Encode(inputMap) + if (err != nil) { return nil, err } + + inputMapBytes := buffer.Bytes() + + return inputMapBytes, nil +} + +func decodeBytesToDiscreteTraitPredictionAccuracyInfoMap(inputBytes []byte)(DiscreteTraitPredictionAccuracyInfoMap, error){ + + if (inputBytes == nil){ + return nil, errors.New("DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap called with nil inputBytes.") + } + + buffer := bytes.NewBuffer(inputBytes) + + decoder := gob.NewDecoder(buffer) + + var newDiscreteTraitPredictionAccuracyInfoMap DiscreteTraitPredictionAccuracyInfoMap + + err := decoder.Decode(&newDiscreteTraitPredictionAccuracyInfoMap) + if (err != nil){ return nil, err } + + return newDiscreteTraitPredictionAccuracyInfoMap, nil +} + +type NumericAttributePredictionAccuracyInfoMap map[NumericAttributePredictionInfo]NumericAttributePredictionAccuracyRangesMap + +type NumericAttributePredictionInfo struct{ + + // This is a value between 0-100 which describes the percentage of the loci which were tested for the input for the prediction + PercentageOfLociTested int + + // This is a value between 0-100 which describes the percentage of the tested loci which were phased for the input for the prediction + PercentageOfPhasedLoci int +} + +// Map Structure: Accuracy Percentage (AP) -> Amount needed to deviate from prediction for the value to be accurate (AP)% of the time +// For example, if the model predicted that someone was 150 centimeters tall, how many centimeters would we have to deviate in both directions +// in order for the true outcome to fall into the range 10% of the time, 20% of the time, 30% of the time, etc... +// Example: +// -90%+: 50 centimeters +// If you travel 50 centimeters in both directions from the prediction, +// the true height value will fall into this range 90% of the time. +// -50%+: 20 centimeters +// -10%+: 10 centimeters +type NumericAttributePredictionAccuracyRangesMap map[int]float64 + + +func EncodeNumericAttributePredictionAccuracyInfoMapToBytes(inputMap NumericAttributePredictionAccuracyInfoMap)([]byte, error){ + + buffer := new(bytes.Buffer) + + encoder := gob.NewEncoder(buffer) + + err := encoder.Encode(inputMap) + if (err != nil) { return nil, err } + + inputMapBytes := buffer.Bytes() + + return inputMapBytes, nil +} + +func decodeBytesToNumericAttributePredictionAccuracyInfoMap(inputBytes []byte)(NumericAttributePredictionAccuracyInfoMap, error){ + + if (inputBytes == nil){ + return nil, errors.New("DecodeBytesToNumericAttributePredictionAccuracyInfoMap called with nil inputBytes.") + } + + buffer := bytes.NewBuffer(inputBytes) + + decoder := gob.NewDecoder(buffer) + + var newNumericAttributePredictionAccuracyInfoMap NumericAttributePredictionAccuracyInfoMap + + err := decoder.Decode(&newNumericAttributePredictionAccuracyInfoMap) + if (err != nil){ return nil, err } + + return newNumericAttributePredictionAccuracyInfoMap, nil +} + +func GetPredictionModelDiscreteTraitAccuracyInfoMap(traitName string)(DiscreteTraitPredictionAccuracyInfoMap, error){ + + getAccuracyInfoMap := func()(DiscreteTraitPredictionAccuracyInfoMap, error){ + + switch traitName{ + case "Eye Color":{ + return predictionAccuracyMap_EyeColor, nil + } + case "Lactose Tolerance":{ + return predictionAccuracyMap_LactoseTolerance, nil + } + } + + return nil, errors.New("GetPredictionModelDiscreteTraitAccuracyInfoMap called with unknown traitName: " + traitName) + } + + accuracyInfoMap, err := getAccuracyInfoMap() + if (err != nil) { return nil, err } + + if (accuracyInfoMap == nil){ + return nil, errors.New("GetPredictionModelDiscreteTraitAccuracyInfoMap called when map is not initialized.") + } + + return accuracyInfoMap, nil +} + + +// The files returned by this function are .gob encoded geneticPrediction.NumericAttributePredictionAccuracyInfoMap objects +func GetPredictionModelNumericAttributeAccuracyInfoMap(attributeName string)(NumericAttributePredictionAccuracyInfoMap, error){ + + getAccuracyInfoMap := func()(NumericAttributePredictionAccuracyInfoMap, error){ + + switch attributeName{ + case "Height":{ + return predictionAccuracyMap_Height, nil + } + case "Autism":{ + return predictionAccuracyMap_Autism, nil + } + case "Homosexualness":{ + return predictionAccuracyMap_Homosexualness, nil + } + case "Obesity":{ + return predictionAccuracyMap_Obesity, nil + } + } + + return nil, errors.New("GetPredictionModelNumericAttributeAccuracyInfoMap called with unknown attributeName: " + attributeName) + } + + accuracyInfoMap, err := getAccuracyInfoMap() + if (err != nil) { return nil, err } + + if (accuracyInfoMap == nil){ + return nil, errors.New("GetPredictionModelNumericAttributeAccuracyInfoMap called when map is not initialized.") + } + + return accuracyInfoMap, nil +} + + diff --git a/resources/trainedPredictionModels/trainedPredictionModels_test.go b/resources/trainedPredictionModels/trainedPredictionModels_test.go new file mode 100644 index 0000000..e84de88 --- /dev/null +++ b/resources/trainedPredictionModels/trainedPredictionModels_test.go @@ -0,0 +1,172 @@ +package trainedPredictionModels_test + +import "seekia/resources/trainedPredictionModels" + +import "testing" + +import "seekia/resources/geneticReferences/polygenicDiseases" +import "seekia/resources/geneticReferences/traits" + +import "seekia/internal/genetics/locusValue" +import "seekia/internal/helpers" + +import "errors" + + +func TestTrainedPredictionModels(t *testing.T){ + + err := polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } + + err = traits.InitializeTraitVariables() + if (err != nil) { + t.Fatalf("InitializeTraitVariables failed: " + err.Error()) + } + + err = trainedPredictionModels.InitializeTrainedPredictionModels() + if (err != nil){ + t.Fatalf("InitializeTrainedPredictionModels failed: " + err.Error()) + } + + for i:=0; i < 10; i++{ + + discreteTraitNamesList := []string{"Eye Color", "Lactose Tolerance"} + + for _, traitName := range discreteTraitNamesList{ + + modelExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(traitName) + if (modelExists == false){ + t.Fatalf("Prediction model not found: " + traitName) + } + + traitObject, err := traits.GetTraitObject(traitName) + if (err != nil) { + t.Fatalf("GetTraitObject failed: " + err.Error()) + } + + traitLociList := traitObject.LociList + + testGenomeMap, err := getFakeGenomeMap(traitLociList) + if (err != nil){ + t.Fatalf("getFakeGenomeMap failed: " + err.Error()) + } + + neuralNetworkExists, predictionIsPossible, _, _, _, _, err := trainedPredictionModels.GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap(traitName, traitLociList, testGenomeMap) + if (err != nil){ + t.Fatalf("GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap failed: " + err.Error()) + } + if (neuralNetworkExists == false){ + t.Fatalf("GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap claims that neural network doesn't exist for trait: " + traitName) + } + if (predictionIsPossible == false){ + t.Fatalf("GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap claims that prediction isn't possible.") + } + } + + numericAttributeNamesList := []string{"Height", "Autism", "Obesity", "Homosexualness"} + + for _, attributeName := range numericAttributeNamesList{ + + modelExists := trainedPredictionModels.CheckIfAttributeNeuralNetworkExists(attributeName) + if (modelExists == false){ + t.Fatalf("Prediction model not found: " + attributeName) + } + + getAttributeLociList := func()([]int64, error){ + + switch attributeName{ + + case "Homosexualness", + "Height":{ + + traitObject, err := traits.GetTraitObject(attributeName) + if (err != nil) { + t.Fatalf("GetTraitObject failed: " + attributeName) + } + + traitLociList := traitObject.LociList + + return traitLociList, nil + } + case "Obesity", + "Autism":{ + + diseaseObject, err := polygenicDiseases.GetPolygenicDiseaseObject(attributeName) + if (err != nil){ + t.Fatalf("GetPolygenicDiesaseObject failed: " + err.Error()) + } + + diseaseLociList := diseaseObject.LociList + + return diseaseLociList, nil + } + } + + return nil, errors.New("Unknown attributeName: " + attributeName) + } + + attributeLociList, err := getAttributeLociList() + if (err != nil){ + t.Fatalf(err.Error()) + } + + testGenomeMap, err := getFakeGenomeMap(attributeLociList) + if (err != nil){ + t.Fatalf("getFakeGenomeMap failed: " + err.Error()) + } + + neuralNetworkExists, predictionIsPossible, _, _, _, _, err := trainedPredictionModels.GetNeuralNetworkNumericAttributePredictionFromGenomeMap(attributeName, attributeLociList, testGenomeMap) + if (err != nil){ + t.Fatalf("GetNeuralNetworkNumericAttributePredictionFromGenomeMap failed: " + err.Error()) + } + if (neuralNetworkExists == false){ + t.Fatalf("GetNeuralNetworkNumericAttributePredictionFromGenomeMap claims that neural network doesn't exist for attribute: " + attributeName) + } + if (predictionIsPossible == false){ + t.Fatalf("GetNeuralNetworkNumericAttributePredictionFromGenomeMap claims that prediction isn't possible.") + } + } + } +} + + +func getFakeGenomeMap(lociList []int64)(map[int64]locusValue.LocusValue, error){ + + // We create a fake genome map + + testGenomeMap := make(map[int64]locusValue.LocusValue) + + for index, rsID := range lociList{ + + if (index != 0){ + // We always include the first locus + + // We will include approximately 80% of the locations in the genome + includeLocusBool, err := helpers.GetRandomBoolWithProbability(0.8) + if (err != nil){ return nil, err } + if (includeLocusBool == false){ + continue + } + } + + locusIsPhasedBool := helpers.GetRandomBool() + + randomAllele1, err := helpers.GetRandomItemFromList([]string{"C", "A", "T", "G", "I", "D"}) + if (err != nil){ return nil, err } + randomAllele2, err := helpers.GetRandomItemFromList([]string{"C", "A", "T", "G", "I", "D"}) + if (err != nil){ return nil, err } + + newLocusValue := locusValue.LocusValue{ + Base1Value: randomAllele1, + Base2Value: randomAllele2, + LocusIsPhased: locusIsPhasedBool, + } + + testGenomeMap[rsID] = newLocusValue + } + + return testGenomeMap, nil +} + diff --git a/utilities/createGeneticModels/createGeneticModels.go b/utilities/createGeneticModels/createGeneticModels.go index a0d32da..8d5c7e9 100644 --- a/utilities/createGeneticModels/createGeneticModels.go +++ b/utilities/createGeneticModels/createGeneticModels.go @@ -3,7 +3,7 @@ // These are neural networks which predict attributes such as eye color and autism from raw genome files // The OpenSNP.org dataset is used, and more datasets will be added in the future. // You must download the dataset and extract it. The instructions are described in the utility. -// The trained models are saved in the /resources/geneticPredictionModels package for use in the Seekia app. +// The trained models are saved in the /resources/trainedPredictionModels package for use in the Seekia app. package main @@ -19,12 +19,14 @@ import "fyne.io/fyne/v2/data/binding" import "seekia/resources/geneticReferences/polygenicDiseases" import "seekia/resources/geneticReferences/traits" import "seekia/resources/geneticReferences/locusMetadata" +import "seekia/resources/trainedPredictionModels" import "seekia/internal/encoding" import "seekia/internal/genetics/locusValue" import "seekia/internal/genetics/prepareRawGenomes" import "seekia/internal/genetics/readRawGenomes" import "seekia/internal/genetics/geneticPrediction" +import "seekia/internal/genetics/geneticPredictionModels" import "seekia/internal/globalSettings" import "seekia/internal/helpers" import "seekia/internal/imagery" @@ -1298,7 +1300,7 @@ func setStartAndMonitorTrainModelPage(window fyne.Window, attributeName string, // Network training is complete. // We now save the neural network as a .gob file - neuralNetworkBytes, err := geneticPrediction.EncodeNeuralNetworkObjectToBytes(*neuralNetworkObject) + neuralNetworkBytes, err := geneticPredictionModels.EncodeNeuralNetworkObjectToBytes(*neuralNetworkObject) if (err != nil) { return false, err } attributeNameWithoutWhitespaces := strings.ReplaceAll(attributeName, " ", "") @@ -1466,7 +1468,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p // -bool: Process completed (true == was not stopped mid-way) // -geneticPrediction.DiscreteTraitPredictionAccuracyInfoMap // -error - testModel := func()(bool, geneticPrediction.DiscreteTraitPredictionAccuracyInfoMap, error){ + testModel := func()(bool, trainedPredictionModels.DiscreteTraitPredictionAccuracyInfoMap, error){ type TraitAccuracyStatisticsValue struct{ @@ -1486,7 +1488,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p // We use this map to count up the information about predictions // We use information from this map to construct the final accuracy information map - traitPredictionInfoMap := make(map[geneticPrediction.DiscreteTraitOutcomeInfo]TraitAccuracyStatisticsValue) + traitPredictionInfoMap := make(map[trainedPredictionModels.DiscreteTraitOutcomeInfo]TraitAccuracyStatisticsValue) _, testingSetFilepathsList, err := getTrainingAndTestingDataFilepathLists(attributeName) @@ -1505,7 +1507,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p return false, nil, errors.New("TrainedModel not found: " + trainedModelFilepath) } - neuralNetworkObject, err := geneticPrediction.DecodeBytesToNeuralNetworkObject(fileContents) + neuralNetworkObject, err := geneticPredictionModels.DecodeBytesToNeuralNetworkObject(fileContents) if (err != nil) { return false, nil, err } numberOfTrainingDatas := len(testingSetFilepathsList) @@ -1572,7 +1574,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p { // We first add the information to the map for the correct outcome - newTraitOutcomeInfo_CorrectOutcome := geneticPrediction.DiscreteTraitOutcomeInfo{ + newTraitOutcomeInfo_CorrectOutcome := trainedPredictionModels.DiscreteTraitOutcomeInfo{ OutcomeName: correctOutcomeName, PercentageOfLociTested: percentageOfLociTested, @@ -1603,7 +1605,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p { // We now add the information to the map for the predicted outcome - newTraitOutcomeInfo_PredictedOutcome := geneticPrediction.DiscreteTraitOutcomeInfo{ + newTraitOutcomeInfo_PredictedOutcome := trainedPredictionModels.DiscreteTraitOutcomeInfo{ OutcomeName: predictedOutcomeName, PercentageOfLociTested: percentageOfLociTested, @@ -1644,7 +1646,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p // Now we construct the TraitAccuracyInfoMap // This map stores the accuracy for each outcome - traitPredictionAccuracyInfoMap := make(map[geneticPrediction.DiscreteTraitOutcomeInfo]geneticPrediction.DiscreteTraitPredictionAccuracyInfo) + traitPredictionAccuracyInfoMap := make(map[trainedPredictionModels.DiscreteTraitOutcomeInfo]trainedPredictionModels.DiscreteTraitPredictionAccuracyInfo) for traitPredictionInfo, value := range traitPredictionInfoMap{ @@ -1661,7 +1663,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p return false, nil, errors.New("traitPredictionInfoMap contains quantityOfCorrectOutcomePredictions > quantityOfPredictions") } - newTraitPredictionAccuracyInfo := geneticPrediction.DiscreteTraitPredictionAccuracyInfo{ + newTraitPredictionAccuracyInfo := trainedPredictionModels.DiscreteTraitPredictionAccuracyInfo{ QuantityOfExamples: quantityOfExamples, QuantityOfPredictions: quantityOfPredictions, } @@ -1689,7 +1691,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p // We save the info map as a file in the ModelAccuracies folder - fileBytes, err := geneticPrediction.EncodeDiscreteTraitPredictionAccuracyInfoMapToBytes(traitPredictionAccuracyInfoMap) + fileBytes, err := trainedPredictionModels.EncodeDiscreteTraitPredictionAccuracyInfoMapToBytes(traitPredictionAccuracyInfoMap) if (err != nil) { return false, nil, err } _, err = localFilesystem.CreateFolder("./ModelAccuracies") @@ -1732,12 +1734,12 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p // -bool: Process completed (true == was not stopped mid-way) // -geneticPrediction.NumericAttributePredictionAccuracyInfoMap // -error - testModel := func()(bool, geneticPrediction.NumericAttributePredictionAccuracyInfoMap, error){ + testModel := func()(bool, trainedPredictionModels.NumericAttributePredictionAccuracyInfoMap, error){ // We use this map to count up the information about predictions // We use information from this map to construct the final accuracy information map // Map Structure: NumericAttributePredictionInfo -> []float64 (List of distances for each prediction) - attributePredictionInfoMap := make(map[geneticPrediction.NumericAttributePredictionInfo][]float64) + attributePredictionInfoMap := make(map[trainedPredictionModels.NumericAttributePredictionInfo][]float64) _, testingSetFilepathsList, err := getTrainingAndTestingDataFilepathLists(attributeName) if (err != nil) { return false, nil, err } @@ -1755,7 +1757,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p return false, nil, errors.New("TrainedModel not found: " + trainedModelFilepath) } - neuralNetworkObject, err := geneticPrediction.DecodeBytesToNeuralNetworkObject(fileContents) + neuralNetworkObject, err := geneticPredictionModels.DecodeBytesToNeuralNetworkObject(fileContents) if (err != nil) { return false, nil, err } numberOfTrainingDatas := len(testingSetFilepathsList) @@ -1812,7 +1814,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p proportionOfPhasedLoci := float64(numberOfKnownAndPhasedLoci)/float64(numberOfKnownLoci) percentageOfPhasedLoci := int(100*proportionOfPhasedLoci) - newNumericAttributePredictionInfo := geneticPrediction.NumericAttributePredictionInfo{ + newNumericAttributePredictionInfo := trainedPredictionModels.NumericAttributePredictionInfo{ PercentageOfLociTested: percentageOfLociTested, PercentageOfPhasedLoci: percentageOfPhasedLoci, } @@ -1840,7 +1842,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p // Now we construct the AttributeAccuracyInfoMap // This map stores the accuracy for each QuantityOfKnownLoci/QuantityOfPhasedLoci - attributePredictionAccuracyInfoMap := make(map[geneticPrediction.NumericAttributePredictionInfo]geneticPrediction.NumericAttributePredictionAccuracyRangesMap) + attributePredictionAccuracyInfoMap := make(map[trainedPredictionModels.NumericAttributePredictionInfo]trainedPredictionModels.NumericAttributePredictionAccuracyRangesMap) for attributePredictionInfo, predictionDistancesList := range attributePredictionInfoMap{ @@ -1893,7 +1895,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p // We save the info map as a file in the ModelAccuracies folder - fileBytes, err := geneticPrediction.EncodeNumericAttributePredictionAccuracyInfoMapToBytes(attributePredictionAccuracyInfoMap) + fileBytes, err := trainedPredictionModels.EncodeNumericAttributePredictionAccuracyInfoMapToBytes(attributePredictionAccuracyInfoMap) if (err != nil) { return false, nil, err } _, err = localFilesystem.CreateFolder("./ModelAccuracies") @@ -1928,7 +1930,7 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p } // This is a page to view the details of testing for a specific trait's model -func setViewModelTestingDiscreteTraitResultsPage(window fyne.Window, traitName string, traitAccuracyInfoMap geneticPrediction.DiscreteTraitPredictionAccuracyInfoMap, exitPage func()){ +func setViewModelTestingDiscreteTraitResultsPage(window fyne.Window, traitName string, traitAccuracyInfoMap trainedPredictionModels.DiscreteTraitPredictionAccuracyInfoMap, exitPage func()){ title := getBoldLabelCentered("Discrete Trait Prediction Accuracy Details") @@ -2093,7 +2095,7 @@ func setViewModelTestingDiscreteTraitResultsPage(window fyne.Window, traitName s // This is a page to view the details of testing for a numeric attribute's model -func setViewModelTestingNumericAttributeResultsPage(window fyne.Window, attributeName string, attributeAccuracyInfoMap geneticPrediction.NumericAttributePredictionAccuracyInfoMap, exitPage func()){ +func setViewModelTestingNumericAttributeResultsPage(window fyne.Window, attributeName string, attributeAccuracyInfoMap trainedPredictionModels.NumericAttributePredictionAccuracyInfoMap, exitPage func()){ title := getBoldLabelCentered("Numeric Attribute Prediction Accuracy Details")