Improved the Create Genetic Models utility and neural network training code. Models are now able to predict traits with some accuracy.

This commit is contained in:
Simon Sarasova 2024-07-05 21:01:43 +00:00
parent 182175948f
commit 75331a22d3
No known key found for this signature in database
GPG key ID: EEDA4103C9C36944
7 changed files with 999 additions and 443 deletions

View file

@ -6,6 +6,7 @@ Small and insignificant changes may not be included in this log.
## Unversioned Changes
* Improved the Create Genetic Models utility and neural network training code. Models are now able to predict traits with some accuracy. - *Simon Sarasova*
* Improved ReadMe.md. - *Simon Sarasova*
* Improved Seekia's slogan and Whitepaper.md. - *Simon Sarasova*
* Added an Estimated Time Remaining label to 2 processes within the Create Genetic Models utility. - *Simon Sarasova*

View file

@ -9,4 +9,4 @@ Many other people have written code for modules which are imported by Seekia. Th
Name | Date Of First Commit | Number Of Commits
--- | --- | ---
Simon Sarasova | June 13, 2023 | 264
Simon Sarasova | June 13, 2023 | 265

View file

@ -20,11 +20,13 @@ import "gorgonia.org/gorgonia"
import "gorgonia.org/tensor"
import mathRand "math/rand/v2"
import "math"
import "bytes"
import "encoding/gob"
import "slices"
import "errors"
//import "log"
type NeuralNetwork struct{
@ -35,7 +37,6 @@ type NeuralNetwork struct{
weights1 *gorgonia.Node
weights2 *gorgonia.Node
weights3 *gorgonia.Node
weights4 *gorgonia.Node
// This is the computed prediction
prediction *gorgonia.Node
@ -97,7 +98,6 @@ type neuralNetworkForEncoding struct{
Weights1 []float32
Weights2 []float32
Weights3 []float32
Weights4 []float32
Weights1Rows int
Weights1Columns int
@ -105,8 +105,6 @@ type neuralNetworkForEncoding struct{
Weights2Columns int
Weights3Rows int
Weights3Columns int
Weights4Rows int
Weights4Columns int
}
func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte, error){
@ -114,12 +112,10 @@ func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte,
weights1 := inputNeuralNetwork.weights1
weights2 := inputNeuralNetwork.weights2
weights3 := inputNeuralNetwork.weights3
weights4 := inputNeuralNetwork.weights4
weights1Slice := weights1.Value().Data().([]float32)
weights2Slice := weights2.Value().Data().([]float32)
weights3Slice := weights3.Value().Data().([]float32)
weights4Slice := weights4.Value().Data().([]float32)
weights1Rows := weights1.Shape()[0]
weights1Columns := weights1.Shape()[1]
@ -127,14 +123,11 @@ func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte,
weights2Columns := weights2.Shape()[1]
weights3Rows := weights3.Shape()[0]
weights3Columns := weights3.Shape()[1]
weights4Rows := weights4.Shape()[0]
weights4Columns := weights4.Shape()[1]
newNeuralNetworkForEncoding := neuralNetworkForEncoding{
Weights1: weights1Slice,
Weights2: weights2Slice,
Weights3: weights3Slice,
Weights4: weights4Slice,
Weights1Rows: weights1Rows,
Weights1Columns: weights1Columns,
@ -142,8 +135,6 @@ func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte,
Weights2Columns: weights2Columns,
Weights3Rows: weights3Rows,
Weights3Columns: weights3Columns,
Weights4Rows: weights4Rows,
Weights4Columns: weights4Columns,
}
buffer := new(bytes.Buffer)
@ -176,7 +167,6 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
weights1 := newNeuralNetworkForEncoding.Weights1
weights2 := newNeuralNetworkForEncoding.Weights2
weights3 := newNeuralNetworkForEncoding.Weights3
weights4 := newNeuralNetworkForEncoding.Weights4
weights1Rows := newNeuralNetworkForEncoding.Weights1Rows
weights1Columns := newNeuralNetworkForEncoding.Weights1Columns
@ -184,8 +174,6 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
weights2Columns := newNeuralNetworkForEncoding.Weights2Columns
weights3Rows := newNeuralNetworkForEncoding.Weights3Rows
weights3Columns := newNeuralNetworkForEncoding.Weights3Columns
weights4Rows := newNeuralNetworkForEncoding.Weights4Rows
weights4Columns := newNeuralNetworkForEncoding.Weights4Columns
// This is the graph object we add each layer to
newGraph := gorgonia.NewGraph()
@ -210,7 +198,6 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
layer1 := getNewNeuralNetworkLayerWeights("Weights1", weights1Rows, weights1Columns, weights1)
layer2 := getNewNeuralNetworkLayerWeights("Weights2", weights2Rows, weights2Columns, weights2)
layer3 := getNewNeuralNetworkLayerWeights("Weights3", weights3Rows, weights3Columns, weights3)
layer4 := getNewNeuralNetworkLayerWeights("Weights4", weights4Rows, weights4Columns, weights4)
newNeuralNetworkObject := NeuralNetwork{
@ -219,57 +206,204 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
weights1: layer1,
weights2: layer2,
weights3: layer3,
weights4: layer4,
}
return newNeuralNetworkObject, nil
}
//Outputs:
// -int: Number of loci values that are known
// -int: Number of loci values that are known and phased
// -int: Number of loci
// -error
func GetLociInfoFromInputLayer(inputLayer []float32)(int, int, int, error){
// Each input layer has 3 neurons for each locus
// Each rsID (locus) is represented by 3 neurons: LocusExists/LocusIsPhased, Allele1 Value, Allele2 Value
// The LocusExists/LocusIsPhased neuron stores information like so:
// -0 = Locus value is unknown
// -0.5 = Locus Is known, phase is unknown
// -1 = Locus Is Known, phase is known
// Each rsID's neurons are concatenated together to form the inputLayer
inputLayerLength := len(inputLayer)
if (inputLayerLength%3 != 0){
return 0, 0, 0, errors.New("GetInputInfoFromInputLayer called with invalid length input layer: Not evenly divisible by 4.")
}
numberOfLoci := len(inputLayer)/3
numberOfLociValuesThatAreKnown := 0
numberOfLociValuesThatAreKnownAndPhased := 0
for index, neuronValue := range inputLayer{
indexRemainder := index%3
if (indexRemainder == 0){
if (neuronValue == 0){
continue
}
numberOfLociValuesThatAreKnown += 1
/// We use an inequality instead of ==1 because floats are imprecise
if (neuronValue > 0.99){
numberOfLociValuesThatAreKnown += 1
numberOfLociValuesThatAreKnownAndPhased += 1
}
}
}
if (numberOfLociValuesThatAreKnown == 0){
return 0, 0, 0, errors.New("GetInputInfoFromInputLayer called with input layer with no known loci values.")
}
return numberOfLociValuesThatAreKnown, numberOfLociValuesThatAreKnownAndPhased, numberOfLoci, nil
}
// This function returns which outcome is being described from a neural network's final output layer
// Outputs:
// -string: Output Name (Example: "Blue")
// -error
func GetOutcomeNameFromOutputLayer(traitName string, verifyOutputLayer bool, outputLayer []float32)(string, error){
if (verifyOutputLayer == true){
// We make sure all neurons sum to 1
summedNeurons := float32(0)
for _, neuronValue := range outputLayer{
summedNeurons += neuronValue
}
// We allow a small amount of inaccuracy due to the imprecise nature of floats.
if (summedNeurons > 1.1 || summedNeurons < .99){
summedNeuronsString := helpers.ConvertFloat32ToString(summedNeurons)
return "", errors.New("GetOutcomeNameFromOutputLayer called with layer containing neuron values which don't sum to 1: " + summedNeuronsString)
}
}
getBiggestNeuronIndex := func()int{
biggestNeuronValue := float32(0)
biggestNeuronIndex := 0
for index, neuronValue := range outputLayer{
if (index == 0){
biggestNeuronValue = neuronValue
} else {
if (neuronValue > biggestNeuronValue){
biggestNeuronValue = neuronValue
biggestNeuronIndex = index
}
}
}
return biggestNeuronIndex
}
biggestNeuronIndex := getBiggestNeuronIndex()
switch traitName{
case "Eye Color":{
if (len(outputLayer) != 4){
return "", errors.New("GetOutcomeNameFromOutputLayer called with invalid length output layer.")
}
switch biggestNeuronIndex{
case 0:{
return "Blue", nil
}
case 1:{
return "Green", nil
}
case 2:{
return "Hazel", nil
}
case 3:{
return "Brown", nil
}
}
}
case "Lactose Tolerance":{
if (len(outputLayer) != 2){
return "", errors.New("GetOutcomeNameFromOutputLayer called with invalid length output layer.")
}
switch biggestNeuronIndex{
case 0:{
return "Tolerant", nil
}
case 1:{
return "Intolerant", nil
}
}
}
}
return "", errors.New("GetOutcomeNameFromOutputLayer called with unknown traitName: " + traitName)
}
//Outputs:
// -int: Layer 1 neuron count (input layer)
// -int: Layer 2 neuron count
// -int: Layer 3 neuron count
// -int: Layer 4 neuron count
// -int: Layer 5 neuron count (output layer)
// -int: Layer 4 neuron count (output layer)
// -error
func getNeuralNetworkLayerSizes(traitName string)(int, int, int, int, int, error){
func getNeuralNetworkLayerSizes(traitName string)(int, int, int, int, error){
switch traitName{
case "Eye Color":{
// There are 376 input neurons
// There are 282 input neurons
// There are 4 output neurons, each representing a color
// There are 4 colors: Blue, Green, Brown, Hazel
return 376, 200, 100, 50, 4, nil
return 282, 100, 50, 4, nil
}
case "Lactose Tolerance":{
// There are 6 input neurons
// There are 2 output neurons, each representing a tolerance: Tolerant, Intolerant
return 6, 4, 3, 2, nil
}
}
return 0, 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown traitName: " + traitName)
return 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown traitName: " + traitName)
}
//This function converts a genome allele to a neuron to use in a tensor
// A value of 0 means that the allele is unknown
func convertAlleleToNeuron(allele string)(float32, error){
switch allele{
case "C":{
return 0, nil
return 0.16, nil
}
case "A":{
return 0.2, nil
return 0.32, nil
}
case "T":{
return 0.4, nil
return 0.48, nil
}
case "G":{
return 0.6, nil
return 0.64, nil
}
case "I":{
@ -295,7 +429,7 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
userPhenotypeDataObject readBiobankData.PhenotypeData_OpenSNP,
userLocusValuesMap map[int64]locusValue.LocusValue)(bool, []TrainingData, error){
if (traitName != "Eye Color"){
if (traitName != "Eye Color" && traitName != "Lactose Tolerance"){
return false, nil, errors.New("CreateGeneticPredictionTrainingData_OpenSNP called with unknown traitName: " + traitName)
}
@ -315,11 +449,15 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
// Each TrainingData holds a variation of the user's genome rsID values
// We add many rows with withheld data to improve training data
numberOfInputLayerRows, _, _, _, numberOfOutputLayerRows, err := getNeuralNetworkLayerSizes(traitName)
numberOfInputLayerRows, _, _, numberOfOutputLayerRows, err := getNeuralNetworkLayerSizes(traitName)
if (err != nil) { return false, nil, err }
// Each rsID is represented by 4 neurons: LocusExists, LocusIsPhased, Allele1 Value, Allele2 Value
expectedNumberOfInputLayerRows := len(traitRSIDs) * 4
// Each rsID is represented by 3 neurons: LocusExists/LocusIsPhased, Allele1 Value, Allele2 Value
// The LocusExists/LocusIsPhased neuron stores information like so:
// -0 = Locus value is unknown
// -0.5 = Locus Is known, phase is unknown
// -1 = Locus Is Known, phase is known
expectedNumberOfInputLayerRows := len(traitRSIDs) * 3
if (numberOfInputLayerRows != expectedNumberOfInputLayerRows){
@ -365,7 +503,9 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
// -error
getUserTraitValueNeurons := func()(bool, []float32, error){
if (traitName == "Eye Color"){
switch traitName{
case "Eye Color":{
userEyeColorIsKnown := userPhenotypeDataObject.EyeColorIsKnown
if (userEyeColorIsKnown == false){
@ -393,6 +533,23 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
return false, nil, errors.New("Malformed userPhenotypeDataObject: Invalid eyeColor: " + userEyeColor)
}
case "Lactose Tolerance":{
userLactoseToleranceIsKnown := userPhenotypeDataObject.LactoseToleranceIsKnown
if (userLactoseToleranceIsKnown == false){
return false, nil, nil
}
userLactoseTolerance := userPhenotypeDataObject.LactoseTolerance
if (userLactoseTolerance == true){
return true, []float32{1, 0}, nil
}
return true, []float32{0, 1}, nil
}
}
return false, nil, errors.New("Unknown traitName: " + traitName)
}
@ -409,6 +566,12 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
return false, nil, errors.New("getUserTraitValueNeurons returning invalid length layer slice.")
}
// We want the initial training data to be the same for each call of this function that has the same input parameters
// This is a necessary step so our neural network models will be reproducable
// Reproducable means that other people can run the code and produce the same models, byte-for-byte
pseudorandomNumberGenerator := mathRand.New(mathRand.NewPCG(1, 2))
// We create 110 examples per user.
// We randomize allele order whenever phase for the locus is unknown
// 50% of the time we randomize allele order even when phase is known to train the model on unphased data
@ -462,33 +625,41 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
probabilityOfUsingLoci := getProbabilityOfUsingLoci()
// In the inputLayer, each locus value is represented by 4 neurons:
// 1. LocusExists (Either 0 or 1)
// 2. LocusIsPhased (Either 0 or 1)
// 3. Allele1 Locus Value (Value between 0-1)
// 4. Allele2 Locus Value (Value between 0-1)
// In the inputLayer, each locus value is represented by 3 neurons:
// 1. LocusExists/LocusIsPhased
// -0 = Locus value is unknown
// -0.5 = Locus Is known, phase is unknown
// -1 = Locus Is Known, phase is known
// 2. Allele1 Locus Value (Value between 0-1)
// -0 = Value is unknown
// 3. Allele2 Locus Value (Value between 0-1)
// -0 = Value is unknown
inputLayerLength := len(traitRSIDsList) * 4
anyLocusExists := false
inputLayerLength := len(traitRSIDsList) * 3
inputLayer := make([]float32, 0, inputLayerLength)
for _, rsID := range traitRSIDsList{
useLocusBool, err := helpers.GetRandomBoolWithProbability(probabilityOfUsingLoci)
if (err != nil) { return false, nil, err }
if (useLocusBool == false){
randomFloat := pseudorandomNumberGenerator.Float64()
if (randomFloat > probabilityOfUsingLoci){
// This if statement has a !probabilityOfUsingLoci chance of being true.
// We are skipping this locus
inputLayer = append(inputLayer, 0, 0, 0, 0)
inputLayer = append(inputLayer, 0, 0, 0)
continue
}
userLocusValue, exists := userLocusValuesMap[rsID]
if (exists == false){
// This user's locus value is unknown
inputLayer = append(inputLayer, 0, 0, 0, 0)
inputLayer = append(inputLayer, 0, 0, 0)
continue
}
anyLocusExists = true
getLocusAlleles := func()(string, string){
locusAllele1 := userLocusValue.Base1Value
@ -498,9 +669,11 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
return locusAllele1, locusAllele2
}
randomBool := helpers.GetRandomBool()
// We randomize the phase of the locus
if (randomBool == false){
randomNumber := pseudorandomNumberGenerator.IntN(2)
if (randomNumber == 1){
// This has a 50% chance of being true.
return locusAllele1, locusAllele2
}
@ -514,10 +687,16 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
locusAllele2NeuronValue, err := convertAlleleToNeuron(locusAllele2)
if (err != nil) { return false, nil, err }
getLocusIsPhasedNeuronValue := func()float32{
getLocusIsKnownAndPhasedNeuronValue := func()float32{
if (locusAllele1 == locusAllele2){
// Phase of locus must be known.
// Swapping the loci would change nothing.
return 1
}
if (randomizePhaseBool == true){
return 0
return 0.5
}
locusIsPhased := userLocusValue.LocusIsPhased
@ -525,12 +704,18 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
return 1
}
return 0
return 0.5
}
locusIsPhasedNeuronValue := getLocusIsPhasedNeuronValue()
locusIsKnownAndPhasedNeuronValue := getLocusIsKnownAndPhasedNeuronValue()
inputLayer = append(inputLayer, 1, locusIsPhasedNeuronValue, locusAllele1NeuronValue, locusAllele2NeuronValue)
inputLayer = append(inputLayer, locusIsKnownAndPhasedNeuronValue, locusAllele1NeuronValue, locusAllele2NeuronValue)
}
if (anyLocusExists == false){
// We have 0 known loci for this training example.
// We won't add it to the training data.
continue
}
userTraitValueNeuronsCopy := slices.Clone(userTraitValueNeurons)
@ -548,7 +733,7 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error){
layer1NeuronCount, layer2NeuronCount, layer3NeuronCount, layer4NeuronCount, layer5NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
layer1NeuronCount, layer2NeuronCount, layer3NeuronCount, layer4NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
if (err != nil) { return nil, err }
// This is the graph object we add each layer to
@ -572,8 +757,12 @@ func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error)
for i:=0; i < totalNumberOfNeurons; i++{
// This returns a pseudo-random number between 0 and 1
newWeight := pseudorandomNumberGenerator.Float32()
// We initialize the weights with He initialization
// He initialization = (0 +/- sqrt(2/n) where n is the number of nodes in the prior layer)
// pseudorandomNumberGenerator.Float32() returns a pseudo-random number between 0 and 1
newWeight := ((pseudorandomNumberGenerator.Float32()-0.5)*2) * float32(math.Sqrt(float64(2)/float64(layerNeuronRows)))
layerInitialWeightsList = append(layerInitialWeightsList, newWeight)
}
@ -594,7 +783,6 @@ func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error)
layer1 := getNewNeuralNetworkLayerWeights("Weights1", layer1NeuronCount, layer2NeuronCount)
layer2 := getNewNeuralNetworkLayerWeights("Weights2", layer2NeuronCount, layer3NeuronCount)
layer3 := getNewNeuralNetworkLayerWeights("Weights3", layer3NeuronCount, layer4NeuronCount)
layer4 := getNewNeuralNetworkLayerWeights("Weights4", layer4NeuronCount, layer5NeuronCount)
newNeuralNetworkObject := NeuralNetwork{
@ -603,7 +791,6 @@ func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error)
weights1: layer1,
weights2: layer2,
weights3: layer3,
weights4: layer4,
}
return &newNeuralNetworkObject, nil
@ -616,75 +803,68 @@ func (inputNetwork *NeuralNetwork)getLearnables()gorgonia.Nodes{
weights1 := inputNetwork.weights1
weights2 := inputNetwork.weights2
weights3 := inputNetwork.weights3
weights4 := inputNetwork.weights4
result := gorgonia.Nodes{weights1, weights2, weights3, weights4}
result := gorgonia.Nodes{weights1, weights2, weights3}
return result
}
// This function will train the neural network
// The function is passed a single TrainingData example to train on
//
// TODO: This function doesn't work
// The weights do not change during training
// I think the layer dimensions are wrong?
//
func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, trainingData TrainingData)error{
// The function is passed a batch of TrainingData examples to train on
// Inputs:
// -string: Trait Name
// -*NeuralNetwork
// -func()(bool, bool, TrainingData, error): Function to get the next training data.
// -Outputs:
// -bool: User stopped the training run
// -bool: Another training data exists
// -TrainingData: The next training data example
// -error
// Outputs:
// -bool: Process completed (was not stopped mid-way)
// -error
func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, getNextTrainingData func()(bool, bool, TrainingData, error))(bool, error){
layer1NeuronCount, _, _, _, layer5NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
if (err != nil) { return err }
layer1NeuronCount, _, _, layer4NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
if (err != nil) { return false, err }
neuralNetworkGraph := neuralNetworkObject.graph
// This inputLayer contains the allele values for this training example
trainingDataInputLayer := trainingData.InputLayer
// This outputLayer contains the phenotype for this training example (example: Eye color of Blue)
trainingDataOutputLayer := trainingData.OutputLayer
// We convert our inputTensor and outputTensor to the type *Node
inputTensorShapeObject := tensor.WithShape(1, layer1NeuronCount)
outputTensorShapeObject := tensor.WithShape(1, layer5NeuronCount)
inputTensorBacking := tensor.WithBacking(trainingDataInputLayer)
outputTensorBacking := tensor.WithBacking(trainingDataOutputLayer)
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
outputTensor := tensor.New(outputTensorShapeObject, outputTensorBacking)
// We first create the input and output nodes
// They don't have any values yet.
trainingDataInputNode := gorgonia.NewMatrix(neuralNetworkGraph,
tensor.Float32,
gorgonia.WithName("input"),
gorgonia.WithName("Input"),
gorgonia.WithShape(1, layer1NeuronCount),
gorgonia.WithValue(inputTensor),
)
trainingDataOutputNode := gorgonia.NewMatrix(neuralNetworkGraph,
trainingDataExpectedOutputNode := gorgonia.NewMatrix(neuralNetworkGraph,
tensor.Float32,
gorgonia.WithName("expectedOutput"),
gorgonia.WithShape(1, layer5NeuronCount),
gorgonia.WithValue(outputTensor),
gorgonia.WithName("ExpectedOutput"),
gorgonia.WithShape(1, layer4NeuronCount),
)
err = neuralNetworkObject.prepareToComputePrediction(trainingDataInputNode)
if (err != nil) { return err }
err = neuralNetworkObject.buildNeuralNetwork(trainingDataInputNode)
if (err != nil) { return false, err }
// This computes the loss (how accurate was our prediction)
losses, err := gorgonia.Sub(trainingDataOutputNode, neuralNetworkObject.prediction)
if (err != nil) { return err }
losses, err := gorgonia.Sub(trainingDataExpectedOutputNode, neuralNetworkObject.prediction)
if (err != nil) { return false, err }
// Cost is an average of the losses
cost, err := gorgonia.Mean(losses)
if (err != nil) { return err }
squareOfLosses, err := gorgonia.Square(losses)
if (err != nil) { return false, err }
// Cost is an average of the square of losses
cost, err := gorgonia.Mean(squareOfLosses)
if (err != nil) { return false, err }
neuralNetworkLearnables := neuralNetworkObject.getLearnables()
// Grad takes a scalar cost node and a list of with-regards-to, and returns the gradient
_, err = gorgonia.Grad(cost, neuralNetworkLearnables...)
if (err != nil) { return err }
if (err != nil) { return false, err }
bindDualValues := gorgonia.BindDualValues(neuralNetworkLearnables...)
@ -692,29 +872,69 @@ func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, tr
virtualMachine := gorgonia.NewTapeMachine(neuralNetworkGraph, bindDualValues)
// This is the learn rate or step size for the solver.
learningRate := gorgonia.WithLearnRate(.001)
learningRate := gorgonia.WithLearnRate(.01)
// This clips the gradient if it gets too crazy
//gradientClip := gorgonia.WithClip(5)
// gradientClip := gorgonia.WithClip(.05)
solver := gorgonia.NewVanillaSolver(learningRate)
// solver := gorgonia.NewVanillaSolver(learningRate, gradientClip)
defer virtualMachine.Close()
for i:=0; i < 10; i++{
for {
userStoppedTraining, nextDataExists, trainingDataObject, err := getNextTrainingData()
if (err != nil) { return false, err }
if (userStoppedTraining == true){
// User manually stopped the training run
return false, nil
}
if (nextDataExists == false){
// We are done training
break
}
// We convert our input training data slices to the type *Dense and assign them to our nodes
// This inputLayer contains the allele values for this training example
trainingDataInputLayer := trainingDataObject.InputLayer
// This outputLayer contains the phenotype for this training example (example: Eye color of Blue)
trainingDataOutputLayer := trainingDataObject.OutputLayer
inputTensorShapeObject := tensor.WithShape(1, layer1NeuronCount)
outputTensorShapeObject := tensor.WithShape(1, layer4NeuronCount)
inputTensorBacking := tensor.WithBacking(trainingDataInputLayer)
outputTensorBacking := tensor.WithBacking(trainingDataOutputLayer)
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
outputTensor := tensor.New(outputTensorShapeObject, outputTensorBacking)
err = gorgonia.Let(trainingDataInputNode, inputTensor)
if (err != nil) { return false, err }
err = gorgonia.Let(trainingDataExpectedOutputNode, outputTensor)
if (err != nil) { return false, err }
// for i:=0; i < 10; i++{
err = virtualMachine.RunAll()
if (err != nil) { return err }
if (err != nil) { return false, err }
// NodesToValueGrads is a utility function that converts a Nodes to a slice of ValueGrad for the solver
valueGrads := gorgonia.NodesToValueGrads(neuralNetworkLearnables)
err := solver.Step(valueGrads)
if (err != nil) { return err }
err = solver.Step(valueGrads)
if (err != nil) { return false, err }
virtualMachine.Reset()
// }
// log.Println(cost.Value())
}
return nil
return true, nil
}
@ -726,27 +946,28 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, inputLayer
neuralNetworkGraph := inputNeuralNetwork.graph
// We convert the inputLayer []float32 to a node object
numberOfInputNeurons := len(inputLayer)
inputNode := gorgonia.NewMatrix(neuralNetworkGraph,
tensor.Float32,
gorgonia.WithName("Input"),
gorgonia.WithShape(1, numberOfInputNeurons),
)
// We convert the inputLayer []float32 to a tensor *Dense object
inputTensorShapeObject := tensor.WithShape(1, numberOfInputNeurons)
inputTensorBacking := tensor.WithBacking(inputLayer)
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
inputNode := gorgonia.NewMatrix(neuralNetworkGraph,
tensor.Float32,
gorgonia.WithName("input"),
gorgonia.WithShape(1, numberOfInputNeurons),
gorgonia.WithValue(inputTensor),
)
err := inputNeuralNetwork.prepareToComputePrediction(inputNode)
err := gorgonia.Let(inputNode, inputTensor)
if (err != nil) { return nil, err }
prediction := inputNeuralNetwork.prediction
err = inputNeuralNetwork.buildNeuralNetwork(inputNode)
if (err != nil){ return nil, err }
// Now we create a virtual machine to compute the prediction
@ -759,26 +980,25 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, inputLayer
err = virtualMachine.RunAll()
if (err != nil) { return nil, err }
prediction := inputNeuralNetwork.prediction
predictionValues := prediction.Value().Data().([]float32)
return predictionValues, nil
}
// This function will take a neural network and input layer and prepare the network to compute a prediction
// We still need to run a virtual machine after calling this function in order for the prediction to be generated
func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgonia.Node)error{
// This function will take a neural network and input layer and build the network to be able to compute a prediction
// We need to run a virtual machine after calling this function in order for the prediction to be generated
func (inputNetwork *NeuralNetwork)buildNeuralNetwork(inputLayer *gorgonia.Node)error{
// We copy pointer (says to do this in a resource i'm reading)
// We copy node pointer (says to do this in a resource i'm reading)
inputLayerCopy := inputLayer
// We multiply weights at each layer and perform rectification (ReLU) after each multiplication
// We multiply weights at each layer and perform sigmoid after each multiplication
weights1 := inputNetwork.weights1
weights2 := inputNetwork.weights2
weights3 := inputNetwork.weights3
weights4 := inputNetwork.weights4
layer1Product, err := gorgonia.Mul(inputLayerCopy, weights1)
if (err != nil) {
@ -787,9 +1007,11 @@ func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgoni
layer1ProductRectified, err := gorgonia.Rectify(layer1Product)
if (err != nil){
return errors.New("Layer 1 rectification failed: " + err.Error())
return errors.New("Layer 1 Rectify failed: " + err.Error())
}
weights2 := inputNetwork.weights2
layer2Product, err := gorgonia.Mul(layer1ProductRectified, weights2)
if (err != nil) {
return errors.New("Layer 2 multiplication failed: " + err.Error())
@ -797,35 +1019,21 @@ func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgoni
layer2ProductRectified, err := gorgonia.Rectify(layer2Product)
if (err != nil){
return errors.New("Layer 2 rectification failed: " + err.Error())
return errors.New("Layer 2 Rectify failed: " + err.Error())
}
weights3 := inputNetwork.weights3
layer3Product, err := gorgonia.Mul(layer2ProductRectified, weights3)
if (err != nil) {
return errors.New("Layer 3 multiplication failed: " + err.Error())
}
layer3ProductRectified, err := gorgonia.Rectify(layer3Product)
if (err != nil){
return errors.New("Layer 3 rectification failed: " + err.Error())
}
// We SoftMax the output to get the prediction
layer4Product, err := gorgonia.Mul(layer3ProductRectified, weights4)
prediction, err := gorgonia.SoftMax(layer3Product)
if (err != nil) {
return errors.New("Layer 4 multiplication failed: " + err.Error())
}
layer4ProductRectified, err := gorgonia.Rectify(layer4Product)
if (err != nil){
return errors.New("Layer 4 rectification failed: " + err.Error())
}
// We sigmoid the output to get the prediction
//TODO: Use SoftMax instead?
prediction, err := gorgonia.Sigmoid(layer4ProductRectified)
if (err != nil) {
return errors.New("Sigmoid failed: " + err.Error())
return errors.New("SoftMax failed: " + err.Error())
}
inputNetwork.prediction = prediction

View file

@ -120,8 +120,20 @@ func GetGenomesWithMetadataListFromRawGenomesList(inputGenomesList []RawGenomeWi
locusAllele1 := locusBasePairValue.Allele1
locusAllele2 := locusBasePairValue.Allele2
getLocusIsPhasedBool := func()bool{
if (locusAllele1 == locusAllele2){
// Locus has to be phased, because phase flip does not change value
return true
}
return genomeIsPhased
}
locusIsPhased := getLocusIsPhasedBool()
locusValueObject := locusValue.LocusValue{
LocusIsPhased: genomeIsPhased,
LocusIsPhased: locusIsPhased,
Base1Value: locusAllele1,
Base2Value: locusAllele2,
}
@ -466,9 +478,20 @@ func GetGenomesWithMetadataListFromRawGenomesList(inputGenomesList []RawGenomeWi
// The OnlyExcludeConflicts will only omit when there is a tie
// The OnlyIncludeShared requires at least 2 to agree
getLocusIsPhased_OnlyExcludeConflicts := func()bool{
if (locusBase1 == locusBase2){
// These kinds of loci are always phased, becauses swapping the alleles changes nothing.
return true
}
return phaseIsKnown_OnlyExcludeConflicts
}
locusIsPhased_OnlyExcludeConflicts := getLocusIsPhased_OnlyExcludeConflicts()
onlyExcludeConflictsLocusValue := locusValue.LocusValue{
LocusIsPhased: phaseIsKnown_OnlyExcludeConflicts,
LocusIsPhased: locusIsPhased_OnlyExcludeConflicts,
Base1Value: locusBase1,
Base2Value: locusBase2,
}
@ -477,8 +500,19 @@ func GetGenomesWithMetadataListFromRawGenomesList(inputGenomesList []RawGenomeWi
if (mostRecordedSortedBasePairCount >= 2){
getLocusIsPhased_OnlyIncludeShared := func()bool{
if (locusBase1 == locusBase2){
// These kinds of loci are always phased, becauses swapping the alleles changes nothing.
return true
}
return phaseIsKnown_OnlyIncludeShared
}
locusIsPhased_OnlyIncludeShared := getLocusIsPhased_OnlyIncludeShared()
onlyIncludeSharedLocusValue := locusValue.LocusValue{
LocusIsPhased: phaseIsKnown_OnlyIncludeShared,
LocusIsPhased: locusIsPhased_OnlyIncludeShared,
Base1Value: locusBase1,
Base2Value: locusBase2,
}

View file

@ -246,6 +246,13 @@ func CeilFloat64ToInt(input float64)(int, error){
return ceiledInt, nil
}
func ConvertFloat32ToString(input float32) string{
result := strconv.FormatFloat(float64(input), 'f', 5, 32)
return result
}
func ConvertFloat64ToString(input float64) string{
result := strconv.FormatFloat(input, 'f', 5, 64)

View file

@ -125,7 +125,7 @@ func getEyeColorTraitObject()Trait{
TraitDescription: "The color of a person's eyes.",
LociList: eyeColorLociList,
RulesList: []TraitRule{},
OutcomesList: []string{},
OutcomesList: []string{"Blue", "Green", "Hazel", "Brown"},
References: referencesMap,
}

File diff suppressed because it is too large Load diff