diff --git a/Changelog.md b/Changelog.md index fd2f041..3647f5c 100644 --- a/Changelog.md +++ b/Changelog.md @@ -6,6 +6,7 @@ Small and insignificant changes may not be included in this log. ## Unversioned Changes +* Improved the Create Genetic Models utility and neural network training code. Models are now able to predict traits with some accuracy. - *Simon Sarasova* * Improved ReadMe.md. - *Simon Sarasova* * Improved Seekia's slogan and Whitepaper.md. - *Simon Sarasova* * Added an Estimated Time Remaining label to 2 processes within the Create Genetic Models utility. - *Simon Sarasova* diff --git a/Contributors.md b/Contributors.md index 02024f7..1c15447 100644 --- a/Contributors.md +++ b/Contributors.md @@ -9,4 +9,4 @@ Many other people have written code for modules which are imported by Seekia. Th Name | Date Of First Commit | Number Of Commits --- | --- | --- -Simon Sarasova | June 13, 2023 | 264 \ No newline at end of file +Simon Sarasova | June 13, 2023 | 265 \ No newline at end of file diff --git a/internal/genetics/geneticPrediction/geneticPrediction.go b/internal/genetics/geneticPrediction/geneticPrediction.go index 738ab5d..d167762 100644 --- a/internal/genetics/geneticPrediction/geneticPrediction.go +++ b/internal/genetics/geneticPrediction/geneticPrediction.go @@ -20,22 +20,23 @@ import "gorgonia.org/gorgonia" import "gorgonia.org/tensor" import mathRand "math/rand/v2" +import "math" import "bytes" import "encoding/gob" import "slices" import "errors" +//import "log" type NeuralNetwork struct{ // ExprGraph is a data structure for a directed acyclic graph (of expressions). - graph *gorgonia.ExprGraph + graph *gorgonia.ExprGraph // These are the weights for each layer of neurons weights1 *gorgonia.Node weights2 *gorgonia.Node weights3 *gorgonia.Node - weights4 *gorgonia.Node // This is the computed prediction prediction *gorgonia.Node @@ -97,7 +98,6 @@ type neuralNetworkForEncoding struct{ Weights1 []float32 Weights2 []float32 Weights3 []float32 - Weights4 []float32 Weights1Rows int Weights1Columns int @@ -105,8 +105,6 @@ type neuralNetworkForEncoding struct{ Weights2Columns int Weights3Rows int Weights3Columns int - Weights4Rows int - Weights4Columns int } func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte, error){ @@ -114,12 +112,10 @@ func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte, weights1 := inputNeuralNetwork.weights1 weights2 := inputNeuralNetwork.weights2 weights3 := inputNeuralNetwork.weights3 - weights4 := inputNeuralNetwork.weights4 weights1Slice := weights1.Value().Data().([]float32) weights2Slice := weights2.Value().Data().([]float32) weights3Slice := weights3.Value().Data().([]float32) - weights4Slice := weights4.Value().Data().([]float32) weights1Rows := weights1.Shape()[0] weights1Columns := weights1.Shape()[1] @@ -127,14 +123,11 @@ func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte, weights2Columns := weights2.Shape()[1] weights3Rows := weights3.Shape()[0] weights3Columns := weights3.Shape()[1] - weights4Rows := weights4.Shape()[0] - weights4Columns := weights4.Shape()[1] newNeuralNetworkForEncoding := neuralNetworkForEncoding{ Weights1: weights1Slice, Weights2: weights2Slice, Weights3: weights3Slice, - Weights4: weights4Slice, Weights1Rows: weights1Rows, Weights1Columns: weights1Columns, @@ -142,8 +135,6 @@ func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte, Weights2Columns: weights2Columns, Weights3Rows: weights3Rows, Weights3Columns: weights3Columns, - Weights4Rows: weights4Rows, - Weights4Columns: weights4Columns, } buffer := new(bytes.Buffer) @@ -176,7 +167,6 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork, weights1 := newNeuralNetworkForEncoding.Weights1 weights2 := newNeuralNetworkForEncoding.Weights2 weights3 := newNeuralNetworkForEncoding.Weights3 - weights4 := newNeuralNetworkForEncoding.Weights4 weights1Rows := newNeuralNetworkForEncoding.Weights1Rows weights1Columns := newNeuralNetworkForEncoding.Weights1Columns @@ -184,8 +174,6 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork, weights2Columns := newNeuralNetworkForEncoding.Weights2Columns weights3Rows := newNeuralNetworkForEncoding.Weights3Rows weights3Columns := newNeuralNetworkForEncoding.Weights3Columns - weights4Rows := newNeuralNetworkForEncoding.Weights4Rows - weights4Columns := newNeuralNetworkForEncoding.Weights4Columns // This is the graph object we add each layer to newGraph := gorgonia.NewGraph() @@ -210,7 +198,6 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork, layer1 := getNewNeuralNetworkLayerWeights("Weights1", weights1Rows, weights1Columns, weights1) layer2 := getNewNeuralNetworkLayerWeights("Weights2", weights2Rows, weights2Columns, weights2) layer3 := getNewNeuralNetworkLayerWeights("Weights3", weights3Rows, weights3Columns, weights3) - layer4 := getNewNeuralNetworkLayerWeights("Weights4", weights4Rows, weights4Columns, weights4) newNeuralNetworkObject := NeuralNetwork{ @@ -219,57 +206,204 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork, weights1: layer1, weights2: layer2, weights3: layer3, - weights4: layer4, } return newNeuralNetworkObject, nil } +//Outputs: +// -int: Number of loci values that are known +// -int: Number of loci values that are known and phased +// -int: Number of loci +// -error +func GetLociInfoFromInputLayer(inputLayer []float32)(int, int, int, error){ + + // Each input layer has 3 neurons for each locus + // Each rsID (locus) is represented by 3 neurons: LocusExists/LocusIsPhased, Allele1 Value, Allele2 Value + // The LocusExists/LocusIsPhased neuron stores information like so: + // -0 = Locus value is unknown + // -0.5 = Locus Is known, phase is unknown + // -1 = Locus Is Known, phase is known + // Each rsID's neurons are concatenated together to form the inputLayer + + inputLayerLength := len(inputLayer) + + if (inputLayerLength%3 != 0){ + return 0, 0, 0, errors.New("GetInputInfoFromInputLayer called with invalid length input layer: Not evenly divisible by 4.") + } + + numberOfLoci := len(inputLayer)/3 + + numberOfLociValuesThatAreKnown := 0 + numberOfLociValuesThatAreKnownAndPhased := 0 + + for index, neuronValue := range inputLayer{ + + indexRemainder := index%3 + + if (indexRemainder == 0){ + + if (neuronValue == 0){ + continue + } + + numberOfLociValuesThatAreKnown += 1 + + /// We use an inequality instead of ==1 because floats are imprecise + if (neuronValue > 0.99){ + numberOfLociValuesThatAreKnown += 1 + numberOfLociValuesThatAreKnownAndPhased += 1 + } + } + } + + if (numberOfLociValuesThatAreKnown == 0){ + return 0, 0, 0, errors.New("GetInputInfoFromInputLayer called with input layer with no known loci values.") + } + + return numberOfLociValuesThatAreKnown, numberOfLociValuesThatAreKnownAndPhased, numberOfLoci, nil +} + +// This function returns which outcome is being described from a neural network's final output layer +// Outputs: +// -string: Output Name (Example: "Blue") +// -error +func GetOutcomeNameFromOutputLayer(traitName string, verifyOutputLayer bool, outputLayer []float32)(string, error){ + + if (verifyOutputLayer == true){ + + // We make sure all neurons sum to 1 + + summedNeurons := float32(0) + + for _, neuronValue := range outputLayer{ + summedNeurons += neuronValue + } + + // We allow a small amount of inaccuracy due to the imprecise nature of floats. + if (summedNeurons > 1.1 || summedNeurons < .99){ + summedNeuronsString := helpers.ConvertFloat32ToString(summedNeurons) + return "", errors.New("GetOutcomeNameFromOutputLayer called with layer containing neuron values which don't sum to 1: " + summedNeuronsString) + } + } + + getBiggestNeuronIndex := func()int{ + + biggestNeuronValue := float32(0) + biggestNeuronIndex := 0 + + for index, neuronValue := range outputLayer{ + + if (index == 0){ + biggestNeuronValue = neuronValue + } else { + + if (neuronValue > biggestNeuronValue){ + biggestNeuronValue = neuronValue + biggestNeuronIndex = index + } + } + } + + return biggestNeuronIndex + } + + biggestNeuronIndex := getBiggestNeuronIndex() + + switch traitName{ + + case "Eye Color":{ + + if (len(outputLayer) != 4){ + return "", errors.New("GetOutcomeNameFromOutputLayer called with invalid length output layer.") + } + + switch biggestNeuronIndex{ + case 0:{ + return "Blue", nil + } + case 1:{ + return "Green", nil + } + case 2:{ + return "Hazel", nil + } + case 3:{ + return "Brown", nil + } + } + } + case "Lactose Tolerance":{ + + if (len(outputLayer) != 2){ + return "", errors.New("GetOutcomeNameFromOutputLayer called with invalid length output layer.") + } + + switch biggestNeuronIndex{ + case 0:{ + return "Tolerant", nil + } + case 1:{ + return "Intolerant", nil + } + } + } + } + + return "", errors.New("GetOutcomeNameFromOutputLayer called with unknown traitName: " + traitName) +} + //Outputs: // -int: Layer 1 neuron count (input layer) // -int: Layer 2 neuron count // -int: Layer 3 neuron count -// -int: Layer 4 neuron count -// -int: Layer 5 neuron count (output layer) +// -int: Layer 4 neuron count (output layer) // -error -func getNeuralNetworkLayerSizes(traitName string)(int, int, int, int, int, error){ +func getNeuralNetworkLayerSizes(traitName string)(int, int, int, int, error){ switch traitName{ case "Eye Color":{ - // There are 376 input neurons + // There are 282 input neurons // There are 4 output neurons, each representing a color // There are 4 colors: Blue, Green, Brown, Hazel - return 376, 200, 100, 50, 4, nil + return 282, 100, 50, 4, nil + } + case "Lactose Tolerance":{ + + // There are 6 input neurons + // There are 2 output neurons, each representing a tolerance: Tolerant, Intolerant + return 6, 4, 3, 2, nil } } - return 0, 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown traitName: " + traitName) + return 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown traitName: " + traitName) } //This function converts a genome allele to a neuron to use in a tensor +// A value of 0 means that the allele is unknown func convertAlleleToNeuron(allele string)(float32, error){ switch allele{ case "C":{ - return 0, nil + return 0.16, nil } case "A":{ - return 0.2, nil + return 0.32, nil } case "T":{ - return 0.4, nil + return 0.48, nil } case "G":{ - return 0.6, nil + return 0.64, nil } case "I":{ @@ -295,7 +429,7 @@ func CreateGeneticPredictionTrainingData_OpenSNP( userPhenotypeDataObject readBiobankData.PhenotypeData_OpenSNP, userLocusValuesMap map[int64]locusValue.LocusValue)(bool, []TrainingData, error){ - if (traitName != "Eye Color"){ + if (traitName != "Eye Color" && traitName != "Lactose Tolerance"){ return false, nil, errors.New("CreateGeneticPredictionTrainingData_OpenSNP called with unknown traitName: " + traitName) } @@ -315,11 +449,15 @@ func CreateGeneticPredictionTrainingData_OpenSNP( // Each TrainingData holds a variation of the user's genome rsID values // We add many rows with withheld data to improve training data - numberOfInputLayerRows, _, _, _, numberOfOutputLayerRows, err := getNeuralNetworkLayerSizes(traitName) + numberOfInputLayerRows, _, _, numberOfOutputLayerRows, err := getNeuralNetworkLayerSizes(traitName) if (err != nil) { return false, nil, err } - // Each rsID is represented by 4 neurons: LocusExists, LocusIsPhased, Allele1 Value, Allele2 Value - expectedNumberOfInputLayerRows := len(traitRSIDs) * 4 + // Each rsID is represented by 3 neurons: LocusExists/LocusIsPhased, Allele1 Value, Allele2 Value + // The LocusExists/LocusIsPhased neuron stores information like so: + // -0 = Locus value is unknown + // -0.5 = Locus Is known, phase is unknown + // -1 = Locus Is Known, phase is known + expectedNumberOfInputLayerRows := len(traitRSIDs) * 3 if (numberOfInputLayerRows != expectedNumberOfInputLayerRows){ @@ -365,33 +503,52 @@ func CreateGeneticPredictionTrainingData_OpenSNP( // -error getUserTraitValueNeurons := func()(bool, []float32, error){ - if (traitName == "Eye Color"){ + switch traitName{ - userEyeColorIsKnown := userPhenotypeDataObject.EyeColorIsKnown - if (userEyeColorIsKnown == false){ - return false, nil, nil + case "Eye Color":{ + + userEyeColorIsKnown := userPhenotypeDataObject.EyeColorIsKnown + if (userEyeColorIsKnown == false){ + return false, nil, nil + } + + userEyeColor := userPhenotypeDataObject.EyeColor + + if (userEyeColor == "Blue"){ + + return true, []float32{1, 0, 0, 0}, nil + + } else if (userEyeColor == "Green"){ + + return true, []float32{0, 1, 0, 0}, nil + + } else if (userEyeColor == "Hazel"){ + + return true, []float32{0, 0, 1, 0}, nil + + } else if (userEyeColor == "Brown"){ + + return true, []float32{0, 0, 0, 1}, nil + } + + return false, nil, errors.New("Malformed userPhenotypeDataObject: Invalid eyeColor: " + userEyeColor) } + case "Lactose Tolerance":{ - userEyeColor := userPhenotypeDataObject.EyeColor + userLactoseToleranceIsKnown := userPhenotypeDataObject.LactoseToleranceIsKnown + if (userLactoseToleranceIsKnown == false){ + return false, nil, nil + } - if (userEyeColor == "Blue"){ + userLactoseTolerance := userPhenotypeDataObject.LactoseTolerance - return true, []float32{1, 0, 0, 0}, nil + if (userLactoseTolerance == true){ - } else if (userEyeColor == "Green"){ + return true, []float32{1, 0}, nil + } - return true, []float32{0, 1, 0, 0}, nil - - } else if (userEyeColor == "Hazel"){ - - return true, []float32{0, 0, 1, 0}, nil - - } else if (userEyeColor == "Brown"){ - - return true, []float32{0, 0, 0, 1}, nil + return true, []float32{0, 1}, nil } - - return false, nil, errors.New("Malformed userPhenotypeDataObject: Invalid eyeColor: " + userEyeColor) } return false, nil, errors.New("Unknown traitName: " + traitName) @@ -409,6 +566,12 @@ func CreateGeneticPredictionTrainingData_OpenSNP( return false, nil, errors.New("getUserTraitValueNeurons returning invalid length layer slice.") } + // We want the initial training data to be the same for each call of this function that has the same input parameters + // This is a necessary step so our neural network models will be reproducable + // Reproducable means that other people can run the code and produce the same models, byte-for-byte + + pseudorandomNumberGenerator := mathRand.New(mathRand.NewPCG(1, 2)) + // We create 110 examples per user. // We randomize allele order whenever phase for the locus is unknown // 50% of the time we randomize allele order even when phase is known to train the model on unphased data @@ -462,33 +625,41 @@ func CreateGeneticPredictionTrainingData_OpenSNP( probabilityOfUsingLoci := getProbabilityOfUsingLoci() - // In the inputLayer, each locus value is represented by 4 neurons: - // 1. LocusExists (Either 0 or 1) - // 2. LocusIsPhased (Either 0 or 1) - // 3. Allele1 Locus Value (Value between 0-1) - // 4. Allele2 Locus Value (Value between 0-1) + // In the inputLayer, each locus value is represented by 3 neurons: + // 1. LocusExists/LocusIsPhased + // -0 = Locus value is unknown + // -0.5 = Locus Is known, phase is unknown + // -1 = Locus Is Known, phase is known + // 2. Allele1 Locus Value (Value between 0-1) + // -0 = Value is unknown + // 3. Allele2 Locus Value (Value between 0-1) + // -0 = Value is unknown - inputLayerLength := len(traitRSIDsList) * 4 + anyLocusExists := false + + inputLayerLength := len(traitRSIDsList) * 3 inputLayer := make([]float32, 0, inputLayerLength) for _, rsID := range traitRSIDsList{ - useLocusBool, err := helpers.GetRandomBoolWithProbability(probabilityOfUsingLoci) - if (err != nil) { return false, nil, err } - if (useLocusBool == false){ + randomFloat := pseudorandomNumberGenerator.Float64() + if (randomFloat > probabilityOfUsingLoci){ + // This if statement has a !probabilityOfUsingLoci chance of being true. // We are skipping this locus - inputLayer = append(inputLayer, 0, 0, 0, 0) + inputLayer = append(inputLayer, 0, 0, 0) continue } userLocusValue, exists := userLocusValuesMap[rsID] if (exists == false){ // This user's locus value is unknown - inputLayer = append(inputLayer, 0, 0, 0, 0) + inputLayer = append(inputLayer, 0, 0, 0) continue } + anyLocusExists = true + getLocusAlleles := func()(string, string){ locusAllele1 := userLocusValue.Base1Value @@ -498,9 +669,11 @@ func CreateGeneticPredictionTrainingData_OpenSNP( return locusAllele1, locusAllele2 } - randomBool := helpers.GetRandomBool() + // We randomize the phase of the locus - if (randomBool == false){ + randomNumber := pseudorandomNumberGenerator.IntN(2) + if (randomNumber == 1){ + // This has a 50% chance of being true. return locusAllele1, locusAllele2 } @@ -514,10 +687,16 @@ func CreateGeneticPredictionTrainingData_OpenSNP( locusAllele2NeuronValue, err := convertAlleleToNeuron(locusAllele2) if (err != nil) { return false, nil, err } - getLocusIsPhasedNeuronValue := func()float32{ + getLocusIsKnownAndPhasedNeuronValue := func()float32{ + + if (locusAllele1 == locusAllele2){ + // Phase of locus must be known. + // Swapping the loci would change nothing. + return 1 + } if (randomizePhaseBool == true){ - return 0 + return 0.5 } locusIsPhased := userLocusValue.LocusIsPhased @@ -525,12 +704,18 @@ func CreateGeneticPredictionTrainingData_OpenSNP( return 1 } - return 0 + return 0.5 } - locusIsPhasedNeuronValue := getLocusIsPhasedNeuronValue() + locusIsKnownAndPhasedNeuronValue := getLocusIsKnownAndPhasedNeuronValue() - inputLayer = append(inputLayer, 1, locusIsPhasedNeuronValue, locusAllele1NeuronValue, locusAllele2NeuronValue) + inputLayer = append(inputLayer, locusIsKnownAndPhasedNeuronValue, locusAllele1NeuronValue, locusAllele2NeuronValue) + } + + if (anyLocusExists == false){ + // We have 0 known loci for this training example. + // We won't add it to the training data. + continue } userTraitValueNeuronsCopy := slices.Clone(userTraitValueNeurons) @@ -548,7 +733,7 @@ func CreateGeneticPredictionTrainingData_OpenSNP( func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error){ - layer1NeuronCount, layer2NeuronCount, layer3NeuronCount, layer4NeuronCount, layer5NeuronCount, err := getNeuralNetworkLayerSizes(traitName) + layer1NeuronCount, layer2NeuronCount, layer3NeuronCount, layer4NeuronCount, err := getNeuralNetworkLayerSizes(traitName) if (err != nil) { return nil, err } // This is the graph object we add each layer to @@ -572,8 +757,12 @@ func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error) for i:=0; i < totalNumberOfNeurons; i++{ - // This returns a pseudo-random number between 0 and 1 - newWeight := pseudorandomNumberGenerator.Float32() + // We initialize the weights with He initialization + // He initialization = (0 +/- sqrt(2/n) where n is the number of nodes in the prior layer) + + // pseudorandomNumberGenerator.Float32() returns a pseudo-random number between 0 and 1 + + newWeight := ((pseudorandomNumberGenerator.Float32()-0.5)*2) * float32(math.Sqrt(float64(2)/float64(layerNeuronRows))) layerInitialWeightsList = append(layerInitialWeightsList, newWeight) } @@ -594,7 +783,6 @@ func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error) layer1 := getNewNeuralNetworkLayerWeights("Weights1", layer1NeuronCount, layer2NeuronCount) layer2 := getNewNeuralNetworkLayerWeights("Weights2", layer2NeuronCount, layer3NeuronCount) layer3 := getNewNeuralNetworkLayerWeights("Weights3", layer3NeuronCount, layer4NeuronCount) - layer4 := getNewNeuralNetworkLayerWeights("Weights4", layer4NeuronCount, layer5NeuronCount) newNeuralNetworkObject := NeuralNetwork{ @@ -603,7 +791,6 @@ func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error) weights1: layer1, weights2: layer2, weights3: layer3, - weights4: layer4, } return &newNeuralNetworkObject, nil @@ -616,75 +803,68 @@ func (inputNetwork *NeuralNetwork)getLearnables()gorgonia.Nodes{ weights1 := inputNetwork.weights1 weights2 := inputNetwork.weights2 weights3 := inputNetwork.weights3 - weights4 := inputNetwork.weights4 - result := gorgonia.Nodes{weights1, weights2, weights3, weights4} + result := gorgonia.Nodes{weights1, weights2, weights3} return result } // This function will train the neural network -// The function is passed a single TrainingData example to train on -// -// TODO: This function doesn't work -// The weights do not change during training -// I think the layer dimensions are wrong? -// -func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, trainingData TrainingData)error{ +// The function is passed a batch of TrainingData examples to train on +// Inputs: +// -string: Trait Name +// -*NeuralNetwork +// -func()(bool, bool, TrainingData, error): Function to get the next training data. +// -Outputs: +// -bool: User stopped the training run +// -bool: Another training data exists +// -TrainingData: The next training data example +// -error +// Outputs: +// -bool: Process completed (was not stopped mid-way) +// -error +func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, getNextTrainingData func()(bool, bool, TrainingData, error))(bool, error){ - layer1NeuronCount, _, _, _, layer5NeuronCount, err := getNeuralNetworkLayerSizes(traitName) - if (err != nil) { return err } + layer1NeuronCount, _, _, layer4NeuronCount, err := getNeuralNetworkLayerSizes(traitName) + if (err != nil) { return false, err } neuralNetworkGraph := neuralNetworkObject.graph - // This inputLayer contains the allele values for this training example - trainingDataInputLayer := trainingData.InputLayer - - // This outputLayer contains the phenotype for this training example (example: Eye color of Blue) - trainingDataOutputLayer := trainingData.OutputLayer - - // We convert our inputTensor and outputTensor to the type *Node - - inputTensorShapeObject := tensor.WithShape(1, layer1NeuronCount) - outputTensorShapeObject := tensor.WithShape(1, layer5NeuronCount) - - inputTensorBacking := tensor.WithBacking(trainingDataInputLayer) - outputTensorBacking := tensor.WithBacking(trainingDataOutputLayer) - - inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking) - outputTensor := tensor.New(outputTensorShapeObject, outputTensorBacking) + // We first create the input and output nodes + // They don't have any values yet. trainingDataInputNode := gorgonia.NewMatrix(neuralNetworkGraph, tensor.Float32, - gorgonia.WithName("input"), + gorgonia.WithName("Input"), gorgonia.WithShape(1, layer1NeuronCount), - gorgonia.WithValue(inputTensor), ) - trainingDataOutputNode := gorgonia.NewMatrix(neuralNetworkGraph, + trainingDataExpectedOutputNode := gorgonia.NewMatrix(neuralNetworkGraph, tensor.Float32, - gorgonia.WithName("expectedOutput"), - gorgonia.WithShape(1, layer5NeuronCount), - gorgonia.WithValue(outputTensor), + gorgonia.WithName("ExpectedOutput"), + gorgonia.WithShape(1, layer4NeuronCount), ) - err = neuralNetworkObject.prepareToComputePrediction(trainingDataInputNode) - if (err != nil) { return err } + err = neuralNetworkObject.buildNeuralNetwork(trainingDataInputNode) + if (err != nil) { return false, err } // This computes the loss (how accurate was our prediction) - losses, err := gorgonia.Sub(trainingDataOutputNode, neuralNetworkObject.prediction) - if (err != nil) { return err } + losses, err := gorgonia.Sub(trainingDataExpectedOutputNode, neuralNetworkObject.prediction) + if (err != nil) { return false, err } - // Cost is an average of the losses - cost, err := gorgonia.Mean(losses) - if (err != nil) { return err } + squareOfLosses, err := gorgonia.Square(losses) + if (err != nil) { return false, err } + + // Cost is an average of the square of losses + cost, err := gorgonia.Mean(squareOfLosses) + if (err != nil) { return false, err } neuralNetworkLearnables := neuralNetworkObject.getLearnables() // Grad takes a scalar cost node and a list of with-regards-to, and returns the gradient _, err = gorgonia.Grad(cost, neuralNetworkLearnables...) - if (err != nil) { return err } + if (err != nil) { return false, err } bindDualValues := gorgonia.BindDualValues(neuralNetworkLearnables...) @@ -692,29 +872,69 @@ func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, tr virtualMachine := gorgonia.NewTapeMachine(neuralNetworkGraph, bindDualValues) // This is the learn rate or step size for the solver. - learningRate := gorgonia.WithLearnRate(.001) + learningRate := gorgonia.WithLearnRate(.01) // This clips the gradient if it gets too crazy - //gradientClip := gorgonia.WithClip(5) +// gradientClip := gorgonia.WithClip(.05) solver := gorgonia.NewVanillaSolver(learningRate) - //solver := gorgonia.NewVanillaSolver(learningRate, gradientClip) +// solver := gorgonia.NewVanillaSolver(learningRate, gradientClip) + defer virtualMachine.Close() - for i:=0; i < 10; i++{ + for { - err = virtualMachine.RunAll() - if (err != nil) { return err } + userStoppedTraining, nextDataExists, trainingDataObject, err := getNextTrainingData() + if (err != nil) { return false, err } + if (userStoppedTraining == true){ + // User manually stopped the training run + return false, nil + } + if (nextDataExists == false){ + // We are done training + break + } - // NodesToValueGrads is a utility function that converts a Nodes to a slice of ValueGrad for the solver - valueGrads := gorgonia.NodesToValueGrads(neuralNetworkLearnables) + // We convert our input training data slices to the type *Dense and assign them to our nodes - err := solver.Step(valueGrads) - if (err != nil) { return err } + // This inputLayer contains the allele values for this training example + trainingDataInputLayer := trainingDataObject.InputLayer - virtualMachine.Reset() + // This outputLayer contains the phenotype for this training example (example: Eye color of Blue) + trainingDataOutputLayer := trainingDataObject.OutputLayer + + inputTensorShapeObject := tensor.WithShape(1, layer1NeuronCount) + outputTensorShapeObject := tensor.WithShape(1, layer4NeuronCount) + + inputTensorBacking := tensor.WithBacking(trainingDataInputLayer) + outputTensorBacking := tensor.WithBacking(trainingDataOutputLayer) + + inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking) + outputTensor := tensor.New(outputTensorShapeObject, outputTensorBacking) + + err = gorgonia.Let(trainingDataInputNode, inputTensor) + if (err != nil) { return false, err } + + err = gorgonia.Let(trainingDataExpectedOutputNode, outputTensor) + if (err != nil) { return false, err } + +// for i:=0; i < 10; i++{ + + err = virtualMachine.RunAll() + if (err != nil) { return false, err } + + // NodesToValueGrads is a utility function that converts a Nodes to a slice of ValueGrad for the solver + valueGrads := gorgonia.NodesToValueGrads(neuralNetworkLearnables) + + err = solver.Step(valueGrads) + if (err != nil) { return false, err } + + virtualMachine.Reset() +// } + +// log.Println(cost.Value()) } - return nil + return true, nil } @@ -726,28 +946,29 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, inputLayer neuralNetworkGraph := inputNeuralNetwork.graph - // We convert the inputLayer []float32 to a node object - numberOfInputNeurons := len(inputLayer) + inputNode := gorgonia.NewMatrix(neuralNetworkGraph, + tensor.Float32, + gorgonia.WithName("Input"), + gorgonia.WithShape(1, numberOfInputNeurons), + ) + + // We convert the inputLayer []float32 to a tensor *Dense object + inputTensorShapeObject := tensor.WithShape(1, numberOfInputNeurons) inputTensorBacking := tensor.WithBacking(inputLayer) inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking) - inputNode := gorgonia.NewMatrix(neuralNetworkGraph, - tensor.Float32, - gorgonia.WithName("input"), - gorgonia.WithShape(1, numberOfInputNeurons), - gorgonia.WithValue(inputTensor), - ) + err := gorgonia.Let(inputNode, inputTensor) + if (err != nil) { return nil, err } - err := inputNeuralNetwork.prepareToComputePrediction(inputNode) + + err = inputNeuralNetwork.buildNeuralNetwork(inputNode) if (err != nil){ return nil, err } - prediction := inputNeuralNetwork.prediction - // Now we create a virtual machine to compute the prediction neuralNetworkLearnables := inputNeuralNetwork.getLearnables() @@ -759,26 +980,25 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, inputLayer err = virtualMachine.RunAll() if (err != nil) { return nil, err } + prediction := inputNeuralNetwork.prediction + predictionValues := prediction.Value().Data().([]float32) return predictionValues, nil } -// This function will take a neural network and input layer and prepare the network to compute a prediction -// We still need to run a virtual machine after calling this function in order for the prediction to be generated -func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgonia.Node)error{ +// This function will take a neural network and input layer and build the network to be able to compute a prediction +// We need to run a virtual machine after calling this function in order for the prediction to be generated +func (inputNetwork *NeuralNetwork)buildNeuralNetwork(inputLayer *gorgonia.Node)error{ - // We copy pointer (says to do this in a resource i'm reading) + // We copy node pointer (says to do this in a resource i'm reading) inputLayerCopy := inputLayer - // We multiply weights at each layer and perform rectification (ReLU) after each multiplication + // We multiply weights at each layer and perform sigmoid after each multiplication weights1 := inputNetwork.weights1 - weights2 := inputNetwork.weights2 - weights3 := inputNetwork.weights3 - weights4 := inputNetwork.weights4 layer1Product, err := gorgonia.Mul(inputLayerCopy, weights1) if (err != nil) { @@ -787,9 +1007,11 @@ func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgoni layer1ProductRectified, err := gorgonia.Rectify(layer1Product) if (err != nil){ - return errors.New("Layer 1 rectification failed: " + err.Error()) + return errors.New("Layer 1 Rectify failed: " + err.Error()) } + weights2 := inputNetwork.weights2 + layer2Product, err := gorgonia.Mul(layer1ProductRectified, weights2) if (err != nil) { return errors.New("Layer 2 multiplication failed: " + err.Error()) @@ -797,35 +1019,21 @@ func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgoni layer2ProductRectified, err := gorgonia.Rectify(layer2Product) if (err != nil){ - return errors.New("Layer 2 rectification failed: " + err.Error()) + return errors.New("Layer 2 Rectify failed: " + err.Error()) } + weights3 := inputNetwork.weights3 + layer3Product, err := gorgonia.Mul(layer2ProductRectified, weights3) if (err != nil) { return errors.New("Layer 3 multiplication failed: " + err.Error()) } - layer3ProductRectified, err := gorgonia.Rectify(layer3Product) - if (err != nil){ - return errors.New("Layer 3 rectification failed: " + err.Error()) - } + // We SoftMax the output to get the prediction - layer4Product, err := gorgonia.Mul(layer3ProductRectified, weights4) + prediction, err := gorgonia.SoftMax(layer3Product) if (err != nil) { - return errors.New("Layer 4 multiplication failed: " + err.Error()) - } - - layer4ProductRectified, err := gorgonia.Rectify(layer4Product) - if (err != nil){ - return errors.New("Layer 4 rectification failed: " + err.Error()) - } - - // We sigmoid the output to get the prediction - //TODO: Use SoftMax instead? - - prediction, err := gorgonia.Sigmoid(layer4ProductRectified) - if (err != nil) { - return errors.New("Sigmoid failed: " + err.Error()) + return errors.New("SoftMax failed: " + err.Error()) } inputNetwork.prediction = prediction diff --git a/internal/genetics/prepareRawGenomes/prepareRawGenomes.go b/internal/genetics/prepareRawGenomes/prepareRawGenomes.go index f32127f..8aa5fbf 100644 --- a/internal/genetics/prepareRawGenomes/prepareRawGenomes.go +++ b/internal/genetics/prepareRawGenomes/prepareRawGenomes.go @@ -120,8 +120,20 @@ func GetGenomesWithMetadataListFromRawGenomesList(inputGenomesList []RawGenomeWi locusAllele1 := locusBasePairValue.Allele1 locusAllele2 := locusBasePairValue.Allele2 + getLocusIsPhasedBool := func()bool{ + + if (locusAllele1 == locusAllele2){ + // Locus has to be phased, because phase flip does not change value + return true + } + + return genomeIsPhased + } + + locusIsPhased := getLocusIsPhasedBool() + locusValueObject := locusValue.LocusValue{ - LocusIsPhased: genomeIsPhased, + LocusIsPhased: locusIsPhased, Base1Value: locusAllele1, Base2Value: locusAllele2, } @@ -466,9 +478,20 @@ func GetGenomesWithMetadataListFromRawGenomesList(inputGenomesList []RawGenomeWi // The OnlyExcludeConflicts will only omit when there is a tie // The OnlyIncludeShared requires at least 2 to agree + getLocusIsPhased_OnlyExcludeConflicts := func()bool{ + if (locusBase1 == locusBase2){ + // These kinds of loci are always phased, becauses swapping the alleles changes nothing. + return true + } + + return phaseIsKnown_OnlyExcludeConflicts + } + + locusIsPhased_OnlyExcludeConflicts := getLocusIsPhased_OnlyExcludeConflicts() + onlyExcludeConflictsLocusValue := locusValue.LocusValue{ - LocusIsPhased: phaseIsKnown_OnlyExcludeConflicts, + LocusIsPhased: locusIsPhased_OnlyExcludeConflicts, Base1Value: locusBase1, Base2Value: locusBase2, } @@ -477,8 +500,19 @@ func GetGenomesWithMetadataListFromRawGenomesList(inputGenomesList []RawGenomeWi if (mostRecordedSortedBasePairCount >= 2){ + getLocusIsPhased_OnlyIncludeShared := func()bool{ + if (locusBase1 == locusBase2){ + // These kinds of loci are always phased, becauses swapping the alleles changes nothing. + return true + } + + return phaseIsKnown_OnlyIncludeShared + } + + locusIsPhased_OnlyIncludeShared := getLocusIsPhased_OnlyIncludeShared() + onlyIncludeSharedLocusValue := locusValue.LocusValue{ - LocusIsPhased: phaseIsKnown_OnlyIncludeShared, + LocusIsPhased: locusIsPhased_OnlyIncludeShared, Base1Value: locusBase1, Base2Value: locusBase2, } diff --git a/internal/helpers/helpers.go b/internal/helpers/helpers.go index 351d4a0..0754a10 100644 --- a/internal/helpers/helpers.go +++ b/internal/helpers/helpers.go @@ -246,6 +246,13 @@ func CeilFloat64ToInt(input float64)(int, error){ return ceiledInt, nil } +func ConvertFloat32ToString(input float32) string{ + + result := strconv.FormatFloat(float64(input), 'f', 5, 32) + + return result +} + func ConvertFloat64ToString(input float64) string{ result := strconv.FormatFloat(input, 'f', 5, 64) diff --git a/resources/geneticReferences/traits/eyeColor.go b/resources/geneticReferences/traits/eyeColor.go index 46cd713..402689c 100644 --- a/resources/geneticReferences/traits/eyeColor.go +++ b/resources/geneticReferences/traits/eyeColor.go @@ -125,7 +125,7 @@ func getEyeColorTraitObject()Trait{ TraitDescription: "The color of a person's eyes.", LociList: eyeColorLociList, RulesList: []TraitRule{}, - OutcomesList: []string{}, + OutcomesList: []string{"Blue", "Green", "Hazel", "Brown"}, References: referencesMap, } diff --git a/utilities/createGeneticModels/createGeneticModels.go b/utilities/createGeneticModels/createGeneticModels.go index 740118d..23c2506 100644 --- a/utilities/createGeneticModels/createGeneticModels.go +++ b/utilities/createGeneticModels/createGeneticModels.go @@ -41,8 +41,11 @@ import mathRand "math/rand/v2" import goFilepath "path/filepath" import "time" + func main(){ + traits.InitializeTraitVariables() + app := app.New() customTheme := getCustomFyneTheme() @@ -458,8 +461,6 @@ func setCreateTrainingDataPage(window fyne.Window, previousPage func()){ func setStartAndMonitorCreateTrainingDataPage(window fyne.Window, previousPage func()){ - traits.InitializeTraitVariables() - err := locusMetadata.InitializeLocusMetadataVariables() if (err != nil){ setErrorEncounteredPage(window, err, previousPage) @@ -710,22 +711,30 @@ func setStartAndMonitorCreateTrainingDataPage(window fyne.Window, previousPage f } } - // We create folder to store the training data + // We create the folder to store the training data _, err = localFilesystem.CreateFolder("./TrainingData") if (err != nil) { return false, false, err } //TODO: Add more traits - traitNamesList := []string{"Eye Color"} + traitNamesList := []string{"Eye Color", "Lactose Tolerance"} // We create the folders for each trait's training data for _, traitName := range traitNamesList{ - folderpath := goFilepath.Join("./TrainingData/", traitName) + traitNameWithoutWhitespace := strings.ReplaceAll(traitName, " ", "") + + folderpath := goFilepath.Join("./TrainingData/", traitNameWithoutWhitespace) _, err = localFilesystem.CreateFolder(folderpath) if (err != nil) { return false, false, err } + + folderExists, err := localFilesystem.DeleteAllFolderContents(folderpath) + if (err != nil) { return false, false, err } + if (folderExists == false){ + return false, false, errors.New("CreateFolder failed to create folder.") + } } numberOfUserPhenotypeDataObjects := len(userPhenotypesList_OpenSNP) @@ -953,7 +962,6 @@ func setCreateTrainingDataIsCompletePage(window fyne.Window){ } - func setTrainModelsPage(window fyne.Window, previousPage func()){ currentPage := func(){setTrainModelsPage(window, previousPage)} @@ -962,24 +970,40 @@ func setTrainModelsPage(window fyne.Window, previousPage func()){ backButton := getBackButtonCentered(previousPage) - description1 := getLabelCentered("Press the button below to begin training the genetic models.") - description2 := getLabelCentered("This will train each neural network using the user training data.") + description1 := getLabelCentered("Press the button below to begin training a genetic model.") + description2 := getLabelCentered("This will train a neural network using the user training data.") description3 := getLabelCentered("This will take a while.") + description4 := getLabelCentered("You must select a trait model to train.") - beginTrainingButton := getWidgetCentered(widget.NewButtonWithIcon("Begin Training Models", theme.MediaPlayIcon(), func(){ - setStartAndMonitorTrainModelsPage(window, currentPage) + traitNamesList := []string{"Eye Color", "Lactose Tolerance"} + + traitNameSelector := widget.NewSelect(traitNamesList, nil) + + beginTrainingButton := getWidgetCentered(widget.NewButtonWithIcon("Begin Training Model", theme.MediaPlayIcon(), func(){ + + selectedTraitIndex := traitNameSelector.SelectedIndex() + if (selectedTraitIndex < 0){ + title := "No Trait Selected" + dialogMessage1 := getLabelCentered("You must select a trait model to train.") + dialogContent := container.NewVBox(dialogMessage1) + dialog.ShowCustom(title, "Close", dialogContent, window) + return + } + + traitName := traitNameSelector.Selected + setStartAndMonitorTrainModelPage(window, traitName, currentPage) })) - page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, description3, beginTrainingButton) + traitNameSelectorCentered := getWidgetCentered(traitNameSelector) + + page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, description3, description4, widget.NewSeparator(), traitNameSelectorCentered, widget.NewSeparator(), beginTrainingButton) window.SetContent(page) } +func setStartAndMonitorTrainModelPage(window fyne.Window, traitName string, previousPage func()){ - -func setStartAndMonitorTrainModelsPage(window fyne.Window, previousPage func()){ - - title := getBoldLabelCentered("Train Models") + title := getBoldLabelCentered("Train Model") //TODO: Verify TrainingData folder integrity @@ -1009,15 +1033,15 @@ func setStartAndMonitorTrainModelsPage(window fyne.Window, previousPage func()){ estimatedTimeRemainingLabelCentered := getWidgetCentered(estimatedTimeRemainingLabel) - // We set this bool to true to stop the trainModels process - var trainModelsIsStoppedBoolMutex sync.RWMutex - trainModelsIsStoppedBool := false + // We set this bool to true to stop the trainModel process + var trainModelIsStoppedBoolMutex sync.RWMutex + trainModelIsStoppedBool := false cancelButton := getWidgetCentered(widget.NewButtonWithIcon("Cancel", theme.CancelIcon(), func(){ - trainModelsIsStoppedBoolMutex.Lock() - trainModelsIsStoppedBool = true - trainModelsIsStoppedBoolMutex.Unlock() + trainModelIsStoppedBoolMutex.Lock() + trainModelIsStoppedBool = true + trainModelIsStoppedBoolMutex.Unlock() previousPage() })) @@ -1026,7 +1050,7 @@ func setStartAndMonitorTrainModelsPage(window fyne.Window, previousPage func()){ window.SetContent(page) - trainModelsFunction := func(){ + trainModelFunction := func(){ var processProgressMutex sync.RWMutex // This stores the amount of progress which has been completed (0-1) @@ -1042,11 +1066,11 @@ func setStartAndMonitorTrainModelsPage(window fyne.Window, previousPage func()){ for{ - trainModelsIsStoppedBoolMutex.RLock() - trainModelsIsStopped := trainModelsIsStoppedBool - trainModelsIsStoppedBoolMutex.RUnlock() + trainModelIsStoppedBoolMutex.RLock() + trainModelIsStopped := trainModelIsStoppedBool + trainModelIsStoppedBoolMutex.RUnlock() - if (trainModelsIsStopped == true){ + if (trainModelIsStopped == true){ // User exited the process/Process has completed return nil } @@ -1093,9 +1117,9 @@ func setStartAndMonitorTrainModelsPage(window fyne.Window, previousPage func()){ err := updateTimeRemainingDisplayFunction() if (err != nil){ - trainModelsIsStoppedBoolMutex.Lock() - trainModelsIsStoppedBool = true - trainModelsIsStoppedBoolMutex.Unlock() + trainModelIsStoppedBoolMutex.Lock() + trainModelIsStoppedBool = true + trainModelIsStoppedBoolMutex.Unlock() setErrorEncounteredPage(window, err, previousPage) return @@ -1107,89 +1131,107 @@ func setStartAndMonitorTrainModelsPage(window fyne.Window, previousPage func()){ //Outputs: // -bool: Process completed (true == was not stopped mid-way) // -error - trainModels := func()(bool, error){ + trainModel := func()(bool, error){ _, err := localFilesystem.CreateFolder("./TrainedModels") if (err != nil) { return false, err } - traitNamesList := []string{"Eye Color"} - for _, traitName := range traitNamesList{ + trainingSetFilepathsList, _, err := getTrainingAndTestingDataFilepathLists(traitName) + if (err != nil) { return false, err } - trainingSetFilepathsList, _, err := getTrainingAndTestingDataFilepathLists(traitName) - if (err != nil) { return false, err } + // We create a new neural network object to train + neuralNetworkObject, err := geneticPrediction.GetNewUntrainedNeuralNetworkObject(traitName) + if (err != nil) { return false, err } - // We create a new neural network object to train - neuralNetworkObject, err := geneticPrediction.GetNewUntrainedNeuralNetworkObject(traitName) - if (err != nil) { return false, err } + numberOfTrainingDatas := len(trainingSetFilepathsList) + numberOfTrainingDatasString := helpers.ConvertIntToString(numberOfTrainingDatas) - numberOfTrainingDatas := len(trainingSetFilepathsList) - numberOfTrainingDatasString := helpers.ConvertIntToString(numberOfTrainingDatas) + // This keeps track of how far along we are in training + trainingDataIndex := 0 - finalIndex := numberOfTrainingDatas - 1 + // Outputs: + // -bool: User stopped training + // -bool: Another training data exists + // -geneticPrediction.TrainingData + // -error + getNextTrainingDataFunction := func()(bool, bool, geneticPrediction.TrainingData, error){ - for index, filePath := range trainingSetFilepathsList{ + trainModelIsStoppedBoolMutex.RLock() + trainModelIsStopped := trainModelIsStoppedBool + trainModelIsStoppedBoolMutex.RUnlock() - trainModelsIsStoppedBoolMutex.RLock() - trainModelsIsStopped := trainModelsIsStoppedBool - trainModelsIsStoppedBoolMutex.RUnlock() - - if (trainModelsIsStopped == true){ - // User exited the process - return false, nil - } - - fileExists, fileContents, err := localFilesystem.GetFileContents(filePath) - if (err != nil) { return false, err } - if (fileExists == false){ - return false, errors.New("TrainingData file not found: " + filePath) - } - - trainingDataObject, err := geneticPrediction.DecodeBytesToTrainingDataObject(fileContents) - if (err != nil) { return false, err } - - err = geneticPrediction.TrainNeuralNetwork(traitName, neuralNetworkObject, trainingDataObject) - if (err != nil) { return false, err } - - exampleIndexString := helpers.ConvertIntToString(index+1) - numberOfExamplesProgress := "Trained " + exampleIndexString + "/" + numberOfTrainingDatasString + " Examples" - - progressDetailsBinding.Set(numberOfExamplesProgress) - - newProgressFloat64 := float64(index)/float64(finalIndex) - - progressPercentageBinding.Set(newProgressFloat64) - - processProgressMutex.Lock() - processProgress = newProgressFloat64 - processProgressMutex.Unlock() + if (trainModelIsStopped == true){ + // User exited the process + return true, false, geneticPrediction.TrainingData{}, nil } - // Network training is complete. - // We now save the neural network as a .gob file + if (trainingDataIndex == numberOfTrainingDatas){ + // We are done training. + return false, false, geneticPrediction.TrainingData{}, nil + } - neuralNetworkBytes, err := geneticPrediction.EncodeNeuralNetworkObjectToBytes(*neuralNetworkObject) - if (err != nil) { return false, err } + trainingDataFilepath := trainingSetFilepathsList[trainingDataIndex] - traitNameWithoutWhitespaces := strings.ReplaceAll(traitName, " ", "") + fileExists, fileContents, err := localFilesystem.GetFileContents(trainingDataFilepath) + if (err != nil) { return false, false, geneticPrediction.TrainingData{}, err } + if (fileExists == false){ + return false, false, geneticPrediction.TrainingData{}, errors.New("TrainingData file not found: " + trainingDataFilepath) + } - neuralNetworkFilename := traitNameWithoutWhitespaces + "Model.gob" + trainingDataObject, err := geneticPrediction.DecodeBytesToTrainingDataObject(fileContents) + if (err != nil) { return false, false, geneticPrediction.TrainingData{}, err } - err = localFilesystem.CreateOrOverwriteFile(neuralNetworkBytes, "./TrainedModels/", neuralNetworkFilename) - if (err != nil) { return false, err } + + trainingDataIndex += 1 + + numberOfExamplesTrainedString := helpers.ConvertIntToString(trainingDataIndex + 1) + numberOfExamplesProgress := "Trained " + numberOfExamplesTrainedString + "/" + numberOfTrainingDatasString + " Examples" + + progressDetailsBinding.Set(numberOfExamplesProgress) + + newProgressFloat64 := float64(trainingDataIndex)/float64(numberOfTrainingDatas) + + err = progressPercentageBinding.Set(newProgressFloat64) + if (err != nil) { return false, false, geneticPrediction.TrainingData{}, err } + + processProgressMutex.Lock() + processProgress = newProgressFloat64 + processProgressMutex.Unlock() + + return false, true, trainingDataObject, nil } + processCompleted, err := geneticPrediction.TrainNeuralNetwork(traitName, neuralNetworkObject, getNextTrainingDataFunction) + if (err != nil) { return false, err } + if (processCompleted == false){ + return false, nil + } + + // Network training is complete. + // We now save the neural network as a .gob file + + neuralNetworkBytes, err := geneticPrediction.EncodeNeuralNetworkObjectToBytes(*neuralNetworkObject) + if (err != nil) { return false, err } + + traitNameWithoutWhitespaces := strings.ReplaceAll(traitName, " ", "") + + neuralNetworkFilename := traitNameWithoutWhitespaces + "Model.gob" + + err = localFilesystem.CreateOrOverwriteFile(neuralNetworkBytes, "./TrainedModels/", neuralNetworkFilename) + if (err != nil) { return false, err } + progressPercentageBinding.Set(1) return true, nil } - processIsComplete, err := trainModels() + processIsComplete, err := trainModel() if (err != nil){ - trainModelsIsStoppedBoolMutex.Lock() - trainModelsIsStoppedBool = true - trainModelsIsStoppedBoolMutex.Unlock() + trainModelIsStoppedBoolMutex.Lock() + trainModelIsStoppedBool = true + trainModelIsStoppedBoolMutex.Unlock() setErrorEncounteredPage(window, err, previousPage) return @@ -1199,19 +1241,19 @@ func setStartAndMonitorTrainModelsPage(window fyne.Window, previousPage func()){ return } - setTrainModelsIsCompletePage(window) + setTrainModelIsCompletePage(window) } - go trainModelsFunction() + go trainModelFunction() } -func setTrainModelsIsCompletePage(window fyne.Window){ +func setTrainModelIsCompletePage(window fyne.Window){ - title := getBoldLabelCentered("Training Models Is Complete") + title := getBoldLabelCentered("Training Model Is Complete") description1 := getLabelCentered("Model training is complete!") - description2 := getLabelCentered("The models have been saved in the TrainedModels folder.") + description2 := getLabelCentered("The model has been saved in the TrainedModels folder.") exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), func(){ setHomePage(window) @@ -1231,24 +1273,81 @@ func setTestModelsPage(window fyne.Window, previousPage func()){ backButton := getBackButtonCentered(previousPage) - description1 := getLabelCentered("Press the button below to begin testing the genetic models.") + description1 := getLabelCentered("Press the button below to begin testing a genetic model.") description2 := getLabelCentered("This will test each neural network using user training data examples.") description3 := getLabelCentered("The testing data is not used to train the models.") description4 := getLabelCentered("The results of the testing will be displayed at the end.") + description5 := getLabelCentered("You must select a trait model to test.") - beginTestingButton := getWidgetCentered(widget.NewButtonWithIcon("Begin Testing Models", theme.MediaPlayIcon(), func(){ - setStartAndMonitorTestModelsPage(window, currentPage) + traitNamesList := []string{"Eye Color", "Lactose Tolerance"} + + traitNameSelector := widget.NewSelect(traitNamesList, nil) + + beginTestingButton := getWidgetCentered(widget.NewButtonWithIcon("Begin Testing Model", theme.MediaPlayIcon(), func(){ + + selectedTraitIndex := traitNameSelector.SelectedIndex() + if (selectedTraitIndex < 0){ + title := "No Trait Selected" + dialogMessage1 := getLabelCentered("You must select a trait model to test.") + dialogContent := container.NewVBox(dialogMessage1) + dialog.ShowCustom(title, "Close", dialogContent, window) + return + } + + traitName := traitNameSelector.Selected + + setStartAndMonitorTestModelPage(window, traitName, currentPage) })) - page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, description3, description4, beginTestingButton) + traitNameSelectorCentered := getWidgetCentered(traitNameSelector) + + page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, description3, description4, description5, widget.NewSeparator(), traitNameSelectorCentered, widget.NewSeparator(), beginTestingButton) window.SetContent(page) } -func setStartAndMonitorTestModelsPage(window fyne.Window, previousPage func()){ +type TraitOutcomeInfo struct{ - title := getBoldLabelCentered("Test Models") + // This is the outcome which was found + // Example: "Blue" + OutcomeName string + + // This is a value between 0-100 which describes the percentage of the loci which were tested for the input for the prediction + PercentageOfLociTested int + + // This is a value between 0-100 which describes the percentage of the tested loci which were phased for the input for the prediction + PercentageOfPhasedLoci int +} + +type TraitPredictionAccuracyInfo struct{ + + // This contains the quantity of examples for the outcome with the specified percentageOfLociTested and percentageOfPhasedLoci + QuantityOfExamples int + + // This contains the quantity of predictions for the outcome with the specified percentageOfLociTested and percentageOfPhasedLoci + // Prediction = our model predicted this outcome + QuantityOfPredictions int + + // This stores the probability (0-100) that our model will accurately predict this outcome for a genome which has + // the specified percentageOfLociTested and percentageOfPhasedLoci + // In other words: What is the probability that if you give Seekia a blue-eyed genome, it will give you a correct Blue prediction? + // This value is only accurate is QuantityOfExamples > 0 + ProbabilityOfCorrectGenomePrediction int + + // This stores the probability (0-100) that our model is correct if our model predicts that a genome + // with the specified percentageOfLociTested and percentageOfPhasedLoci has this outcome + // In other words: What is the probability that if Seekia says a genome will have blue eyes, it is correct? + // This value is only accurate is QuantityOfPredictions > 0 + ProbabilityOfCorrectOutcomePrediction int +} + +// Map Structure: Trait Outcome Info -> Trait Prediction Accuracy Info +type TraitAccuracyInfoMap map[TraitOutcomeInfo]TraitPredictionAccuracyInfo + +func setStartAndMonitorTestModelPage(window fyne.Window, traitName string, previousPage func()){ + + title := getBoldLabelCentered("Testing Model") progressDetailsBinding := binding.NewString() progressPercentageBinding := binding.NewFloat() @@ -1266,15 +1365,15 @@ func setStartAndMonitorTestModelsPage(window fyne.Window, previousPage func()){ progressDetailsLabelCentered := getWidgetCentered(progressDetailsLabel) - // We set this bool to true to stop the testModels process - var testModelsIsStoppedBoolMutex sync.RWMutex - testModelsIsStoppedBool := false + // We set this bool to true to stop the testModel process + var testModelIsStoppedBoolMutex sync.RWMutex + testModelIsStoppedBool := false cancelButton := getWidgetCentered(widget.NewButtonWithIcon("Cancel", theme.CancelIcon(), func(){ - testModelsIsStoppedBoolMutex.Lock() - testModelsIsStoppedBool = true - testModelsIsStoppedBoolMutex.Unlock() + testModelIsStoppedBoolMutex.Lock() + testModelIsStoppedBool = true + testModelIsStoppedBoolMutex.Unlock() previousPage() })) @@ -1283,134 +1382,239 @@ func setStartAndMonitorTestModelsPage(window fyne.Window, previousPage func()){ window.SetContent(page) - testModelsFunction := func(){ - - // This map stores the accuracy for each model - // Map Structure: Trait Name -> Accuracy (A value between 0 and 1, 1 is fully accurate, 0 is fully inaccurate) - traitAverageAccuracyMap := make(map[string]float32) + testModelFunction := func(){ //Outputs: // -bool: Process completed (true == was not stopped mid-way) + // -TraitAccuracyInfoMap // -error - testModels := func()(bool, error){ + testModel := func()(bool, TraitAccuracyInfoMap, error){ - traitNamesList := []string{"Eye Color"} + type TraitAccuracyStatisticsValue struct{ - for _, traitName := range traitNamesList{ + // This stores the quantity of examples of this outcome + QuantityOfExamples int - _, testingSetFilepathsList, err := getTrainingAndTestingDataFilepathLists(traitName) - if (err != nil) { return false, err } + // This stores the quantity of predictions that were made for this outcome + // In other words: The quantity of instances where our model predicted this outcome + QuantityOfPredictions int - traitNameWithoutWhitespaces := strings.ReplaceAll(traitName, " ", "") + // This stores the quantity of predictions that were correct when the genome had this outcome + QuantityOfCorrectGenomePredictions int - // We read the trained model for this trait - modelFilename := traitNameWithoutWhitespaces + "Model.gob" + // This stores the quantity of predictions that were correct when the model predicted this outcome + QuantityOfCorrectOutcomePredictions int + } - trainedModelFilepath := goFilepath.Join("./TrainedModels/", modelFilename) + // We use this map to count up the information about predictions + // We use information from this map to construct the final accuracy information map + traitPredictionInfoMap := make(map[TraitOutcomeInfo]TraitAccuracyStatisticsValue) - fileExists, fileContents, err := localFilesystem.GetFileContents(trainedModelFilepath) - if (err != nil) { return false, err } + + _, testingSetFilepathsList, err := getTrainingAndTestingDataFilepathLists(traitName) + if (err != nil) { return false, nil, err } + + traitNameWithoutWhitespaces := strings.ReplaceAll(traitName, " ", "") + + // We read the trained model for this trait + modelFilename := traitNameWithoutWhitespaces + "Model.gob" + + trainedModelFilepath := goFilepath.Join("./TrainedModels/", modelFilename) + + fileExists, fileContents, err := localFilesystem.GetFileContents(trainedModelFilepath) + if (err != nil) { return false, nil, err } + if (fileExists == false){ + return false, nil, errors.New("TrainedModel not found: " + trainedModelFilepath) + } + + neuralNetworkObject, err := geneticPrediction.DecodeBytesToNeuralNetworkObject(fileContents) + if (err != nil) { return false, nil, err } + + numberOfTrainingDatas := len(testingSetFilepathsList) + numberOfTrainingDatasString := helpers.ConvertIntToString(numberOfTrainingDatas) + + finalIndex := numberOfTrainingDatas - 1 + + for index, filePath := range testingSetFilepathsList{ + + testModelIsStoppedBoolMutex.RLock() + testModelIsStopped := testModelIsStoppedBool + testModelIsStoppedBoolMutex.RUnlock() + + if (testModelIsStopped == true){ + // User exited the process + return false, nil, nil + } + + fileExists, fileContents, err := localFilesystem.GetFileContents(filePath) + if (err != nil) { return false, nil, err } if (fileExists == false){ - return false, errors.New("TrainedModel not found: " + trainedModelFilepath) + return false, nil, errors.New("TrainingData file not found: " + filePath) } - neuralNetworkObject, err := geneticPrediction.DecodeBytesToNeuralNetworkObject(fileContents) - if (err != nil) { return false, err } + trainingDataObject, err := geneticPrediction.DecodeBytesToTrainingDataObject(fileContents) + if (err != nil) { return false, nil, err } - numberOfTrainingDatas := len(testingSetFilepathsList) - numberOfTrainingDatasString := helpers.ConvertIntToString(numberOfTrainingDatas) + trainingDataInputLayer := trainingDataObject.InputLayer + trainingDataExpectedOutputLayer := trainingDataObject.OutputLayer - finalIndex := numberOfTrainingDatas - 1 + predictionLayer, err := geneticPrediction.GetNeuralNetworkRawPrediction(&neuralNetworkObject, trainingDataInputLayer) + if (err != nil) { return false, nil, err } - // This is the sum of accuracy for each training data - accuracySum := float32(0) + numberOfPredictionNeurons := len(predictionLayer) - for index, filePath := range testingSetFilepathsList{ + if (len(trainingDataExpectedOutputLayer) != numberOfPredictionNeurons){ + return false, nil, errors.New("Neural network prediction output length does not match expected output length.") + } - testModelsIsStoppedBoolMutex.RLock() - testModelsIsStopped := testModelsIsStoppedBool - testModelsIsStoppedBoolMutex.RUnlock() + correctOutcomeName, err := geneticPrediction.GetOutcomeNameFromOutputLayer(traitName, true, trainingDataExpectedOutputLayer) + if (err != nil) { return false, nil, err } - if (testModelsIsStopped == true){ - // User exited the process - return false, nil + predictedOutcomeName, err := geneticPrediction.GetOutcomeNameFromOutputLayer(traitName, true, predictionLayer) + if (err != nil) { return false, nil, err } + + getPredictionIsCorrectBool := func()bool{ + if (predictedOutcomeName == correctOutcomeName){ + return true + } + return false + } + + predictionIsCorrect := getPredictionIsCorrectBool() + + numberOfKnownLoci, numberOfKnownAndPhasedLoci, numberOfLoci, err := geneticPrediction.GetLociInfoFromInputLayer(trainingDataInputLayer) + if (err != nil) { return false, nil, err } + + proportionOfLociTested := float64(numberOfKnownLoci)/float64(numberOfLoci) + percentageOfLociTested := int(100*proportionOfLociTested) + + proportionOfPhasedLoci := float64(numberOfKnownAndPhasedLoci)/float64(numberOfKnownLoci) + percentageOfPhasedLoci := int(100*proportionOfPhasedLoci) + + { + // We first add the information to the map for the correct outcome + + newTraitOutcomeInfo_CorrectOutcome := TraitOutcomeInfo{ + + OutcomeName: correctOutcomeName, + PercentageOfLociTested: percentageOfLociTested, + PercentageOfPhasedLoci: percentageOfPhasedLoci, } - fileExists, fileContents, err := localFilesystem.GetFileContents(filePath) - if (err != nil) { return false, err } - if (fileExists == false){ - return false, errors.New("TrainingData file not found: " + filePath) - } + getTraitAccuracyStatisticsValue_CorrectOutcome := func()TraitAccuracyStatisticsValue{ - trainingDataObject, err := geneticPrediction.DecodeBytesToTrainingDataObject(fileContents) - if (err != nil) { return false, err } - - trainingDataInputLayer := trainingDataObject.InputLayer - trainingDataExpectedOutputLayer := trainingDataObject.OutputLayer - - predictionLayer, err := geneticPrediction.GetNeuralNetworkRawPrediction(&neuralNetworkObject, trainingDataInputLayer) - if (err != nil) { return false, err } - - numberOfPredictionNeurons := len(predictionLayer) - - if (len(trainingDataExpectedOutputLayer) != numberOfPredictionNeurons){ - return false, errors.New("Neural network prediction output length does not match expected output length.") - } - - // TODO: Improve how we calculate the accuracy - // We should take into account the number of loci that were provided by the user's genome, - // and display an accuracy for each number of loci provided. - // For example, if 90% of loci values were provided, accuracy is 80%. If only 10% were provided, accuracy is 20%. - - // This is the sum of the distance between the expected values and the predicted values - totalDistance := float32(0) - - for index, element := range predictionLayer{ - - // Each element is a neuron value between 0 and 1 - // We see how far away the answer is from the expected value - - expectedValue := trainingDataExpectedOutputLayer[index] - - distance := element - expectedValue - - // We make distance positive - if (distance < 0){ - distance = -distance + existingTraitAccuracyStatisticsValue, exists := traitPredictionInfoMap[newTraitOutcomeInfo_CorrectOutcome] + if (exists == false){ + newTraitAccuracyStatisticsValue := TraitAccuracyStatisticsValue{} + return newTraitAccuracyStatisticsValue } - - totalDistance += distance + return existingTraitAccuracyStatisticsValue } - averageDistance := totalDistance/float32(numberOfPredictionNeurons) + traitAccuracyStatisticsValue := getTraitAccuracyStatisticsValue_CorrectOutcome() - accuracy := 1 - averageDistance + traitAccuracyStatisticsValue.QuantityOfExamples += 1 - accuracySum += accuracy + if (predictionIsCorrect == true){ + traitAccuracyStatisticsValue.QuantityOfCorrectGenomePredictions += 1 + } - exampleIndexString := helpers.ConvertIntToString(index+1) - numberOfExamplesProgress := "Tested " + exampleIndexString + "/" + numberOfTrainingDatasString + " Examples" - - progressDetailsBinding.Set(numberOfExamplesProgress) - - newProgressFloat64 := float64(index)/float64(finalIndex) - - progressPercentageBinding.Set(newProgressFloat64) + traitPredictionInfoMap[newTraitOutcomeInfo_CorrectOutcome] = traitAccuracyStatisticsValue } - averageAccuracy := accuracySum/float32(numberOfTrainingDatas) + { + // We now add the information to the map for the predicted outcome - traitAverageAccuracyMap[traitName] = averageAccuracy + newTraitOutcomeInfo_PredictedOutcome := TraitOutcomeInfo{ + + OutcomeName: predictedOutcomeName, + PercentageOfLociTested: percentageOfLociTested, + PercentageOfPhasedLoci: percentageOfPhasedLoci, + } + + getTraitAccuracyStatisticsValue_PredictedOutcome := func()TraitAccuracyStatisticsValue{ + + existingTraitAccuracyStatisticsValue, exists := traitPredictionInfoMap[newTraitOutcomeInfo_PredictedOutcome] + if (exists == false){ + newTraitAccuracyStatisticsValue := TraitAccuracyStatisticsValue{} + return newTraitAccuracyStatisticsValue + } + return existingTraitAccuracyStatisticsValue + } + + traitAccuracyStatisticsValue := getTraitAccuracyStatisticsValue_PredictedOutcome() + + traitAccuracyStatisticsValue.QuantityOfPredictions += 1 + + if (predictionIsCorrect == true){ + traitAccuracyStatisticsValue.QuantityOfCorrectOutcomePredictions += 1 + } + + traitPredictionInfoMap[newTraitOutcomeInfo_PredictedOutcome] = traitAccuracyStatisticsValue + } + + exampleIndexString := helpers.ConvertIntToString(index+1) + numberOfExamplesProgress := "Tested " + exampleIndexString + "/" + numberOfTrainingDatasString + " Examples" + + progressDetailsBinding.Set(numberOfExamplesProgress) + + newProgressFloat64 := float64(index)/float64(finalIndex) + + progressPercentageBinding.Set(newProgressFloat64) + } + + // Now we construct the TraitAccuracyInfoMap + + // This map stores the accuracy for each outcome + traitAccuracyInfoMap := make(map[TraitOutcomeInfo]TraitPredictionAccuracyInfo) + + for traitAccuracyData, value := range traitPredictionInfoMap{ + + quantityOfExamples := value.QuantityOfExamples + quantityOfPredictions := value.QuantityOfPredictions + + quantityOfCorrectGenomePredictions := value.QuantityOfCorrectGenomePredictions + quantityOfCorrectOutcomePredictions := value.QuantityOfCorrectOutcomePredictions + + if (quantityOfCorrectGenomePredictions > quantityOfExamples){ + return false, nil, errors.New("traitPredictionInfoMap contains quantityOfCorrectGenomePredictions > quantityOfExamples") + } + if (quantityOfCorrectOutcomePredictions > quantityOfPredictions){ + return false, nil, errors.New("traitPredictionInfoMap contains quantityOfCorrectOutcomePredictions > quantityOfPredictions") + } + + newTraitPredictionAccuracyInfo := TraitPredictionAccuracyInfo{ + QuantityOfExamples: quantityOfExamples, + QuantityOfPredictions: quantityOfPredictions, + } + + if (quantityOfExamples > 0){ + + proportionOfCorrectGenomePredictions := float64(quantityOfCorrectGenomePredictions)/float64(quantityOfExamples) + percentageOfCorrectGenomePredictions := int(100*proportionOfCorrectGenomePredictions) + + newTraitPredictionAccuracyInfo.ProbabilityOfCorrectGenomePrediction = percentageOfCorrectGenomePredictions + } + + if (quantityOfPredictions > 0){ + + proportionOfCorrectOutcomePredictions := float64(quantityOfCorrectOutcomePredictions)/float64(quantityOfPredictions) + percentageOfCorrectOutcomePredictions := int(100*proportionOfCorrectOutcomePredictions) + + newTraitPredictionAccuracyInfo.ProbabilityOfCorrectOutcomePrediction = percentageOfCorrectOutcomePredictions + } + + traitAccuracyInfoMap[traitAccuracyData] = newTraitPredictionAccuracyInfo } // Testing is complete. progressPercentageBinding.Set(1) - return true, nil + return true, traitAccuracyInfoMap, nil } - processIsComplete, err := testModels() + processIsComplete, traitAccuracyInfoMap, err := testModel() if (err != nil){ setErrorEncounteredPage(window, err, previousPage) return @@ -1420,10 +1624,173 @@ func setStartAndMonitorTestModelsPage(window fyne.Window, previousPage func()){ return } - setTestModelsIsCompletePage(window, traitAverageAccuracyMap) + setViewModelTestingTraitResultsPage(window, traitName, traitAccuracyInfoMap, previousPage) } - go testModelsFunction() + go testModelFunction() +} + +// This is a page to view the details of testing for a specific trait's model +func setViewModelTestingTraitResultsPage(window fyne.Window, traitName string, traitAccuracyInfoMap TraitAccuracyInfoMap, exitPage func()){ + + title := getBoldLabelCentered("Trait Prediction Accuracy Details") + + exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), exitPage)) + + description1 := getLabelCentered("The results of the prediction accuracy for this trait are below.") + + traitNameTitle := widget.NewLabel("Trait Name:") + traitNameLabel := getBoldLabel(traitName) + traitNameRow := container.NewHBox(layout.NewSpacer(), traitNameTitle, traitNameLabel, layout.NewSpacer()) + + description2 := getLabelCentered("Prediction accuracy values are a pair of Genome Accuracy/Outcome Accuracy.") + description3 := getLabelCentered("Genome Accuracy is the probability that the model will predict a genome's trait value correctly.") + description4 := getLabelCentered("Outcome Accuracy is the probability that a trait prediction that the model makes is correct.") + + getResultsGrid := func()(*fyne.Container, error){ + + outcomeNameTitle := getItalicLabelCentered("Outcome Name") + emptyLabel1 := widget.NewLabel("") + + predictionAccuracyTitle1 := getItalicLabelCentered("Prediction Accuracy") + knownLociLabel_0to33 := getItalicLabelCentered("0-33% Known Loci") + + predictionAccuracyTitle2 := getItalicLabelCentered("Prediction Accuracy") + knownLociLabel_34to66 := getItalicLabelCentered("34-66% Known Loci") + + predictionAccuracyTitle3 := getItalicLabelCentered("Prediction Accuracy") + knownLociLabel_67to100 := getItalicLabelCentered("67-100% Known Loci") + + emptyLabel2 := widget.NewLabel("") + emptyLabel3 := widget.NewLabel("") + + outcomeNameColumn := container.NewVBox(outcomeNameTitle, emptyLabel1, widget.NewSeparator()) + predictionAccuracyColumn_0to33 := container.NewVBox(predictionAccuracyTitle1, knownLociLabel_0to33, widget.NewSeparator()) + predictionAccuracyColumn_34to66 := container.NewVBox(predictionAccuracyTitle2, knownLociLabel_34to66, widget.NewSeparator()) + predictionAccuracyColumn_67to100 := container.NewVBox(predictionAccuracyTitle3, knownLociLabel_67to100, widget.NewSeparator()) + viewTraitAccuracyDetailsColumn := container.NewVBox(emptyLabel2, emptyLabel3, widget.NewSeparator()) + + traitObject, err := traits.GetTraitObject(traitName) + if (err != nil) { return nil, err } + + outcomeNamesList := traitObject.OutcomesList + + for _, outcomeName := range outcomeNamesList{ + + outcomeNameLabel := getBoldLabelCentered(outcomeName) + + // We use the below variables to sum up the accuracy percentages so we can average them + + genomePredictionAccuracySum_0to33 := 0 + genomeExampleCount_0to33 := 0 + + outcomePredictionAccuracySum_0to33 := 0 + outcomePredictionCount_0to33 := 0 + + genomePredictionAccuracySum_34to66 := 0 + genomeExampleCount_34to66 := 0 + + outcomePredictionAccuracySum_34to66 := 0 + outcomePredictionCount_34to66 := 0 + + genomePredictionAccuracySum_67to100 := 0 + genomeExampleCount_67to100 := 0 + + outcomePredictionAccuracySum_67to100 := 0 + outcomePredictionCount_67to100 := 0 + + for traitOutcomeInfo, traitPredictionAccuracyInfo := range traitAccuracyInfoMap{ + + currentOutcomeName := traitOutcomeInfo.OutcomeName + if (currentOutcomeName != outcomeName){ + continue + } + + percentageOfLociTested := traitOutcomeInfo.PercentageOfLociTested + + quantityOfExamples := traitPredictionAccuracyInfo.QuantityOfExamples + quantityOfPredictions := traitPredictionAccuracyInfo.QuantityOfPredictions + + genomePredictionAccuracyPercentage := traitPredictionAccuracyInfo.ProbabilityOfCorrectGenomePrediction + outcomePredictionAccuracyPercentage := traitPredictionAccuracyInfo.ProbabilityOfCorrectOutcomePrediction + + if (percentageOfLociTested <= 33){ + + genomePredictionAccuracySum_0to33 += (genomePredictionAccuracyPercentage * quantityOfExamples) + genomeExampleCount_0to33 += quantityOfExamples + + outcomePredictionAccuracySum_0to33 += (outcomePredictionAccuracyPercentage * quantityOfPredictions) + outcomePredictionCount_0to33 += quantityOfPredictions + + } else if (percentageOfLociTested > 33 && percentageOfLociTested <= 66){ + + genomePredictionAccuracySum_34to66 += (genomePredictionAccuracyPercentage * quantityOfExamples) + genomeExampleCount_34to66 += quantityOfExamples + + outcomePredictionAccuracySum_34to66 += (outcomePredictionAccuracyPercentage * quantityOfPredictions) + outcomePredictionCount_34to66 += quantityOfPredictions + + } else { + + genomePredictionAccuracySum_67to100 += (genomePredictionAccuracyPercentage * quantityOfExamples) + genomeExampleCount_67to100 += quantityOfExamples + + outcomePredictionAccuracySum_67to100 += (outcomePredictionAccuracyPercentage * quantityOfPredictions) + outcomePredictionCount_67to100 += quantityOfPredictions + } + } + + getAverageAccuracyText := func(accuracySum int, predictionCount int)string{ + if (predictionCount == 0){ + return "Unknown" + } + + averageAccuracy := accuracySum/predictionCount + + averageAccuracyString := helpers.ConvertIntToString(averageAccuracy) + + result := averageAccuracyString + "%" + + return result + } + + genomeAverageAccuracyText_0to33 := getAverageAccuracyText(genomePredictionAccuracySum_0to33, genomeExampleCount_0to33) + genomeAverageAccuracyText_34to66 := getAverageAccuracyText(genomePredictionAccuracySum_34to66, genomeExampleCount_34to66) + genomeAverageAccuracyText_67to100 := getAverageAccuracyText(genomePredictionAccuracySum_67to100, genomeExampleCount_67to100) + + outcomeAverageAccuracyText_0to33 := getAverageAccuracyText(outcomePredictionAccuracySum_0to33, outcomePredictionCount_0to33) + outcomeAverageAccuracyText_34to66 := getAverageAccuracyText(outcomePredictionAccuracySum_34to66, outcomePredictionCount_34to66) + outcomeAverageAccuracyText_67to100 := getAverageAccuracyText(outcomePredictionAccuracySum_67to100, outcomePredictionCount_67to100) + + averageAccuracyLabel_0to33 := getBoldLabelCentered(genomeAverageAccuracyText_0to33 + "/" + outcomeAverageAccuracyText_0to33) + averageAccuracyLabel_34to66 := getBoldLabelCentered(genomeAverageAccuracyText_34to66 + "/" + outcomeAverageAccuracyText_34to66) + averageAccuracyLabel_67to100 := getBoldLabelCentered(genomeAverageAccuracyText_67to100 + "/" + outcomeAverageAccuracyText_67to100) + + outcomeNameColumn.Add(outcomeNameLabel) + predictionAccuracyColumn_0to33.Add(averageAccuracyLabel_0to33) + predictionAccuracyColumn_34to66.Add(averageAccuracyLabel_34to66) + predictionAccuracyColumn_67to100.Add(averageAccuracyLabel_67to100) + + outcomeNameColumn.Add(widget.NewSeparator()) + predictionAccuracyColumn_0to33.Add(widget.NewSeparator()) + predictionAccuracyColumn_34to66.Add(widget.NewSeparator()) + predictionAccuracyColumn_67to100.Add(widget.NewSeparator()) + } + + resultsGrid := container.NewHBox(layout.NewSpacer(), outcomeNameColumn, predictionAccuracyColumn_0to33, predictionAccuracyColumn_34to66, predictionAccuracyColumn_67to100, viewTraitAccuracyDetailsColumn, layout.NewSpacer()) + + return resultsGrid, nil + } + + resultsGrid, err := getResultsGrid() + if (err != nil){ + setErrorEncounteredPage(window, err, func(){setHomePage(window)}) + return + } + + page := container.NewVBox(title, exitButton, widget.NewSeparator(), description1, widget.NewSeparator(), traitNameRow, widget.NewSeparator(), description2, description3, description4, widget.NewSeparator(), resultsGrid) + + window.SetContent(page) } @@ -1434,7 +1801,7 @@ func setStartAndMonitorTestModelsPage(window fyne.Window, previousPage func()){ // -error func getTrainingAndTestingDataFilepathLists(traitName string)([]string, []string, error){ - if (traitName != "Eye Color"){ + if (traitName != "Eye Color" && traitName != "Lactose Tolerance"){ return nil, nil, errors.New("getTrainingAndTestingDataFilepathLists called with invalid traitName: " + traitName) } @@ -1445,8 +1812,8 @@ func getTrainingAndTestingDataFilepathLists(traitName string)([]string, []string filesList, err := os.ReadDir(trainingDataFolderpath) if (err != nil) { return nil, nil, err } - // This stores the filepath for each training data - trainingDataFilepathsList := make([]string, 0, len(filesList)) + // This map stores the file name for each training data + trainingDataFilenamesMap := make(map[string]struct{}) for _, filesystemObject := range filesList{ @@ -1458,27 +1825,24 @@ func getTrainingAndTestingDataFilepathLists(traitName string)([]string, []string fileName := filesystemObject.Name() - filepath := goFilepath.Join(trainingDataFolderpath, fileName) - - trainingDataFilepathsList = append(trainingDataFilepathsList, filepath) + trainingDataFilenamesMap[fileName] = struct{}{} } - numberOfTrainingDataFiles := len(trainingDataFilepathsList) + numberOfTrainingDataFiles := len(trainingDataFilenamesMap) if (numberOfTrainingDataFiles == 0){ return nil, nil, errors.New("No training data exists for trait: " + traitName) } - if ((numberOfTrainingDataFiles % 110) != 0){ - // There are 110 examples for each user. - return nil, nil, errors.New(traitName + " training data has an invalid number of examples.") - } - getNumberOfExpectedTrainingDatas := func()(int, error){ if (traitName == "Eye Color"){ - return 113190, nil + return 112953, nil + + } else if (traitName == "Lactose Tolerance"){ + + return 24808, nil } return 0, errors.New("Unknown traitName: " + traitName) @@ -1508,11 +1872,7 @@ func getTrainingAndTestingDataFilepathLists(traitName string)([]string, []string userIdentifiersMap := make(map[int]struct{}) - for _, trainingDataFilepath := range trainingDataFilepathsList{ - - // We have to extract the filename from the filepath - - trainingDataFilename := goFilepath.Base(trainingDataFilepath) + for trainingDataFilename, _ := range trainingDataFilenamesMap{ // Example filepath format: "User4680_TrainingData_89.gob" @@ -1570,6 +1930,13 @@ func getTrainingAndTestingDataFilepathLists(traitName string)([]string, []string trainingDataFilename := trainingDataFilenamePrefix + kString + ".gob" + _, fileExists := trainingDataFilenamesMap[trainingDataFilename] + if (fileExists == false){ + // Some training datas don't exist due to how training datas are randomly created + // Sometimes, no alleles exist, so we skip creating the training data + continue + } + trainingDataFilepath := goFilepath.Join(trainingDataFolderpath, trainingDataFilename) if (index < numberOfTrainingUsers){ @@ -1583,67 +1950,6 @@ func getTrainingAndTestingDataFilepathLists(traitName string)([]string, []string return trainingSetFilepathsList, testingSetFilepathsList, nil } -// -func setTestModelsIsCompletePage(window fyne.Window, traitPredictionAccuracyMap map[string]float32){ - - title := getBoldLabelCentered("Testing Models Is Complete") - - description1 := getLabelCentered("Model testing is complete!") - description2 := getLabelCentered("The results of the testing are below.") - - getResultsGrid := func()(*fyne.Container, error){ - - traitNameTitle := getItalicLabelCentered("Trait Name") - - predictionAccuracyTitle := getItalicLabelCentered("Prediction Accuracy") - - traitNameColumn := container.NewVBox(traitNameTitle, widget.NewSeparator()) - predictionAccuracyColumn := container.NewVBox(predictionAccuracyTitle, widget.NewSeparator()) - - traitNamesList := helpers.GetListOfMapKeys(traitPredictionAccuracyMap) - - for _, traitName := range traitNamesList{ - - traitNameLabel := getBoldLabelCentered(traitName) - - traitPredictionAccuracy, exists := traitPredictionAccuracyMap[traitName] - if (exists == false){ - return nil, errors.New("traitPredictionAccuracyMap missing traitName: " + traitName) - } - - traitPredictionAccuracyString := helpers.ConvertFloat64ToStringRounded(float64(traitPredictionAccuracy)*100, 2) - - traitPredictionAccuracyFormatted := traitPredictionAccuracyString + "%" - - traitPredictionAccuracyLabel := getBoldLabelCentered(traitPredictionAccuracyFormatted) - - traitNameColumn.Add(traitNameLabel) - predictionAccuracyColumn.Add(traitPredictionAccuracyLabel) - - traitNameColumn.Add(widget.NewSeparator()) - predictionAccuracyColumn.Add(widget.NewSeparator()) - } - - resultsGrid := container.NewHBox(layout.NewSpacer(), traitNameColumn, predictionAccuracyColumn, layout.NewSpacer()) - - return resultsGrid, nil - } - - resultsGrid, err := getResultsGrid() - if (err != nil){ - setErrorEncounteredPage(window, err, func(){setHomePage(window)}) - return - } - - exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), func(){ - setHomePage(window) - })) - - page := container.NewVBox(title, widget.NewSeparator(), description1, description2, exitButton, widget.NewSeparator(), resultsGrid) - - window.SetContent(page) -} - // We use this to define a custom fyne theme // We are only overriding the foreground color to pure black type customTheme struct{