seekia/internal/genetics/geneticPrediction/geneticPrediction.go
2024-08-14 03:37:18 +00:00

1631 lines
52 KiB
Go

// geneticPrediction provides functions to train and query neural network models
// These models are used to predict attributes such as eye color and autism from user genome files
package geneticPrediction
// I am a novice in the ways of neural networks.
// Machine learning experts should chime in and offer improvements.
// We have to make sure that model inference remains very fast
// Sorting matches by offspring total polygenic disease score will require inference on dozens of models for each match
// We could create slower models that provide more accurate predictions
import "seekia/resources/geneticReferences/polygenicDiseases"
import "seekia/resources/geneticReferences/traits"
import "seekia/resources/geneticPredictionModels"
import "seekia/internal/genetics/locusValue"
import "seekia/internal/genetics/readBiobankData"
import "seekia/internal/helpers"
import "gorgonia.org/gorgonia"
import "gorgonia.org/tensor"
import mathRand "math/rand/v2"
import "math"
import "bytes"
import "encoding/gob"
import "slices"
import "errors"
type NeuralNetwork struct{
// ExprGraph is a data structure for a directed acyclic graph (of expressions).
graph *gorgonia.ExprGraph
// These are the weights for each layer of neurons
weights1 *gorgonia.Node
weights2 *gorgonia.Node
weights3 *gorgonia.Node
// This is the computed prediction
prediction *gorgonia.Node
}
// This struct stores a user's training data
// Each TrainingData represents a single data example
// For example, the InputLayer is a column of neurons representing a user's genetics,
// and the OutputLayer is a column representing their phenotype, such as eye color
type TrainingData struct{
// InputLayer stores relevant rsID values for each attribute from the user's genomes
// It also stores if each rsID is phased and if each rsID exists
InputLayer []float32
// OutputLayer stores user phenotype data as neurons
// Each neuron represents an outcome
// For example, for Eye Color, each neuron represents an eye color
OutputLayer []float32
}
func EncodeTrainingDataObjectToBytes(inputTrainingData TrainingData)([]byte, error){
buffer := new(bytes.Buffer)
encoder := gob.NewEncoder(buffer)
err := encoder.Encode(inputTrainingData)
if (err != nil) { return nil, err }
trainingDataBytes := buffer.Bytes()
return trainingDataBytes, nil
}
func DecodeBytesToTrainingDataObject(inputTrainingData []byte)(TrainingData, error){
if (inputTrainingData == nil){
return TrainingData{}, errors.New("DecodeBytesToTrainingDataObject called with nil inputTrainingData.")
}
buffer := bytes.NewBuffer(inputTrainingData)
decoder := gob.NewDecoder(buffer)
var newTrainingData TrainingData
err := decoder.Decode(&newTrainingData)
if (err != nil){ return TrainingData{}, err }
return newTrainingData, nil
}
// We use this to store a neural network's weights as a .gob file
type neuralNetworkForEncoding struct{
// These are the weights for each layer of neurons
Weights1 []float32
Weights2 []float32
Weights3 []float32
Weights1Rows int
Weights1Columns int
Weights2Rows int
Weights2Columns int
Weights3Rows int
Weights3Columns int
}
func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte, error){
weights1 := inputNeuralNetwork.weights1
weights2 := inputNeuralNetwork.weights2
weights3 := inputNeuralNetwork.weights3
weights1Slice := weights1.Value().Data().([]float32)
weights2Slice := weights2.Value().Data().([]float32)
weights3Slice := weights3.Value().Data().([]float32)
weights1Rows := weights1.Shape()[0]
weights1Columns := weights1.Shape()[1]
weights2Rows := weights2.Shape()[0]
weights2Columns := weights2.Shape()[1]
weights3Rows := weights3.Shape()[0]
weights3Columns := weights3.Shape()[1]
newNeuralNetworkForEncoding := neuralNetworkForEncoding{
Weights1: weights1Slice,
Weights2: weights2Slice,
Weights3: weights3Slice,
Weights1Rows: weights1Rows,
Weights1Columns: weights1Columns,
Weights2Rows: weights2Rows,
Weights2Columns: weights2Columns,
Weights3Rows: weights3Rows,
Weights3Columns: weights3Columns,
}
buffer := new(bytes.Buffer)
encoder := gob.NewEncoder(buffer)
err := encoder.Encode(newNeuralNetworkForEncoding)
if (err != nil) { return nil, err }
neuralNetworkBytes := buffer.Bytes()
return neuralNetworkBytes, nil
}
func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork, error){
if (inputNeuralNetwork == nil){
return NeuralNetwork{}, errors.New("DecodeBytesToNeuralNetworkObject called with nil inputNeuralNetwork.")
}
buffer := bytes.NewBuffer(inputNeuralNetwork)
decoder := gob.NewDecoder(buffer)
var newNeuralNetworkForEncoding neuralNetworkForEncoding
err := decoder.Decode(&newNeuralNetworkForEncoding)
if (err != nil){ return NeuralNetwork{}, err }
weights1 := newNeuralNetworkForEncoding.Weights1
weights2 := newNeuralNetworkForEncoding.Weights2
weights3 := newNeuralNetworkForEncoding.Weights3
weights1Rows := newNeuralNetworkForEncoding.Weights1Rows
weights1Columns := newNeuralNetworkForEncoding.Weights1Columns
weights2Rows := newNeuralNetworkForEncoding.Weights2Rows
weights2Columns := newNeuralNetworkForEncoding.Weights2Columns
weights3Rows := newNeuralNetworkForEncoding.Weights3Rows
weights3Columns := newNeuralNetworkForEncoding.Weights3Columns
// This is the graph object we add each layer to
newGraph := gorgonia.NewGraph()
// A layer is a column of neurons
// Each neuron has an initial value between 0 and 1
getNewNeuralNetworkLayerWeights := func(layerName string, layerNeuronRows int, layerNeuronColumns int, layerWeightsList []float32)*gorgonia.Node{
layerNameObject := gorgonia.WithName(layerName)
layerBacking := tensor.WithBacking(layerWeightsList)
layerShape := tensor.WithShape(layerNeuronRows, layerNeuronColumns)
layerTensor := tensor.New(layerBacking, layerShape)
layerValueObject := gorgonia.WithValue(layerTensor)
layerObject := gorgonia.NewMatrix(newGraph, tensor.Float32, layerNameObject, layerValueObject)
return layerObject
}
layer1 := getNewNeuralNetworkLayerWeights("Weights1", weights1Rows, weights1Columns, weights1)
layer2 := getNewNeuralNetworkLayerWeights("Weights2", weights2Rows, weights2Columns, weights2)
layer3 := getNewNeuralNetworkLayerWeights("Weights3", weights3Rows, weights3Columns, weights3)
newNeuralNetworkObject := NeuralNetwork{
graph: newGraph,
weights1: layer1,
weights2: layer2,
weights3: layer3,
}
return newNeuralNetworkObject, nil
}
// This map is used to store information about how accurate genetic prediction models are for discrete traits
// Map Structure: Discrete Trait Outcome Info -> Discrete Trait Prediction Accuracy Info
type DiscreteTraitPredictionAccuracyInfoMap map[DiscreteTraitOutcomeInfo]DiscreteTraitPredictionAccuracyInfo
type DiscreteTraitOutcomeInfo struct{
// This is the outcome which was predicted
// Example: "Blue"
OutcomeName string
// This is a value between 0-100 which describes the percentage of the loci which were tested for the input for the prediction
PercentageOfLociTested int
// This is a value between 0-100 which describes the percentage of the tested loci which were phased for the input for the prediction
PercentageOfPhasedLoci int
}
type DiscreteTraitPredictionAccuracyInfo struct{
// This contains the quantity of examples for the outcome with the specified percentageOfLociTested and percentageOfPhasedLoci
QuantityOfExamples int
// This contains the quantity of predictions for the outcome with the specified percentageOfLociTested and percentageOfPhasedLoci
// Prediction = our model predicted this outcome
QuantityOfPredictions int
// This stores the probability (0-100) that our model will accurately predict this outcome for a genome which has
// the specified percentageOfLociTested and percentageOfPhasedLoci
// In other words: What is the probability that if you give Seekia a blue-eyed genome, it will give you a correct Blue prediction?
// This value is only accurate is QuantityOfExamples > 0
ProbabilityOfCorrectGenomePrediction int
// This stores the probability (0-100) that our model is correct if our model predicts that a genome
// with the specified percentageOfLociTested and percentageOfPhasedLoci has this outcome
// In other words: What is the probability that if Seekia says a genome will have blue eyes, it is correct?
// This value is only accurate is QuantityOfPredictions > 0
ProbabilityOfCorrectOutcomePrediction int
}
func EncodeDiscreteTraitPredictionAccuracyInfoMapToBytes(inputMap DiscreteTraitPredictionAccuracyInfoMap)([]byte, error){
buffer := new(bytes.Buffer)
encoder := gob.NewEncoder(buffer)
err := encoder.Encode(inputMap)
if (err != nil) { return nil, err }
inputMapBytes := buffer.Bytes()
return inputMapBytes, nil
}
func DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap(inputBytes []byte)(DiscreteTraitPredictionAccuracyInfoMap, error){
if (inputBytes == nil){
return nil, errors.New("DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap called with nil inputBytes.")
}
buffer := bytes.NewBuffer(inputBytes)
decoder := gob.NewDecoder(buffer)
var newDiscreteTraitPredictionAccuracyInfoMap DiscreteTraitPredictionAccuracyInfoMap
err := decoder.Decode(&newDiscreteTraitPredictionAccuracyInfoMap)
if (err != nil){ return nil, err }
return newDiscreteTraitPredictionAccuracyInfoMap, nil
}
type NumericAttributePredictionAccuracyInfoMap map[NumericAttributePredictionInfo]NumericAttributePredictionAccuracyRangesMap
type NumericAttributePredictionInfo struct{
// This is a value between 0-100 which describes the percentage of the loci which were tested for the input for the prediction
PercentageOfLociTested int
// This is a value between 0-100 which describes the percentage of the tested loci which were phased for the input for the prediction
PercentageOfPhasedLoci int
}
// Map Structure: Accuracy Percentage (AP) -> Amount needed to deviate from prediction for the value to be accurate (AP)% of the time
// For example, if the model predicted that someone was 150 centimeters tall, how many centimeters would we have to deviate in both directions
// in order for the true outcome to fall into the range 10% of the time, 20% of the time, 30% of the time, etc...
// Example:
// -90%+: 50 centimeters
// If you travel 50 centimeters in both directions from the prediction,
// the true height value will fall into this range 90% of the time.
// -50%+: 20 centimeters
// -10%+: 10 centimeters
type NumericAttributePredictionAccuracyRangesMap map[int]float64
func EncodeNumericAttributePredictionAccuracyInfoMapToBytes(inputMap NumericAttributePredictionAccuracyInfoMap)([]byte, error){
buffer := new(bytes.Buffer)
encoder := gob.NewEncoder(buffer)
err := encoder.Encode(inputMap)
if (err != nil) { return nil, err }
inputMapBytes := buffer.Bytes()
return inputMapBytes, nil
}
func DecodeBytesToNumericAttributePredictionAccuracyInfoMap(inputBytes []byte)(NumericAttributePredictionAccuracyInfoMap, error){
if (inputBytes == nil){
return nil, errors.New("DecodeBytesToNumericAttributePredictionAccuracyInfoMap called with nil inputBytes.")
}
buffer := bytes.NewBuffer(inputBytes)
decoder := gob.NewDecoder(buffer)
var newNumericAttributePredictionAccuracyInfoMap NumericAttributePredictionAccuracyInfoMap
err := decoder.Decode(&newNumericAttributePredictionAccuracyInfoMap)
if (err != nil){ return nil, err }
return newNumericAttributePredictionAccuracyInfoMap, nil
}
//Outputs:
// -bool: Neural network model exists for this trait (trait prediction is possible for this trait)
// -bool: Trait prediction is possible for this user (User has at least 1 known trait locus value)
// -string: Predicted trait outcome (Example: "Blue")
// -int: Confidence: Probability (0-100) that the prediction is accurate
// -int: Quantity of loci known
// -int: Quantity of phased loci
// -error
func GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap(traitName string, genomeMap map[int64]locusValue.LocusValue)(bool, bool, string, int, int, int, error){
traitObject, err := traits.GetTraitObject(traitName)
if (err != nil) { return false, false, "", 0, 0, 0, err }
traitIsDiscreteOrNumeric := traitObject.DiscreteOrNumeric
if (traitIsDiscreteOrNumeric != "Discrete"){
return false, false, "", 0, 0, 0, errors.New("GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap called with non-discrete trait: " + traitName)
}
// This is a map of rsIDs which influence this trait
traitRSIDsList := traitObject.LociList
if (len(traitRSIDsList) == 0){
// Neural network trait prediction is not possible for this trait
return false, false, "", 0, 0, 0, nil
}
predictionModelExists, predictionModelBytes := geneticPredictionModels.GetGeneticPredictionModelBytes(traitName)
if (predictionModelExists == false){
// Neural network trait prediction is not possible for this trait
return false, false, "", 0, 0, 0, nil
}
traitRSIDsListCopy := slices.Clone(traitRSIDsList)
slices.Sort(traitRSIDsListCopy)
neuralNetworkInput, quantityOfLociKnown, quantityOfPhasedLoci, err := createInputNeuralNetworkLayerFromGenomeMap(traitRSIDsListCopy, genomeMap)
if (err != nil) { return false, false, "", 0, 0, 0, err }
if (quantityOfLociKnown == 0){
// We can't predict anything about this trait for this genome
return true, false, "", 0, 0, 0, nil
}
neuralNetworkObject, err := DecodeBytesToNeuralNetworkObject(predictionModelBytes)
if (err != nil) { return false, false, "", 0, 0, 0, err }
outputLayer, err := GetNeuralNetworkRawPrediction(&neuralNetworkObject, false, neuralNetworkInput)
if (err != nil) { return false, false, "", 0, 0, 0, err }
predictedOutcomeName, err := GetDiscreteOutcomeNameFromOutputLayer(traitName, false, outputLayer)
if (err != nil) { return false, false, "", 0, 0, 0, err }
modelTraitAccuracyInfoFile, err := geneticPredictionModels.GetPredictionModelDiscreteTraitAccuracyInfoBytes(traitName)
if (err != nil) { return false, false, "", 0, 0, 0, err }
modelTraitAccuracyInfoMap, err := DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap(modelTraitAccuracyInfoFile)
if (err != nil) { return false, false, "", 0, 0, 0, err }
// We find the model trait accuracy info object that is the most similar to our predicted outcome
getPredictionAccuracy := func()int{
totalNumberOfTraitLoci := len(traitRSIDsList)
proportionOfLociTested := float64(quantityOfLociKnown)/float64(totalNumberOfTraitLoci)
percentageOfLociTested := int(proportionOfLociTested * 100)
proportionOfPhasedLoci := float64(quantityOfPhasedLoci)/float64(totalNumberOfTraitLoci)
percentageOfPhasedLoci := int(proportionOfPhasedLoci * 100)
// This is a value between 0 and 100 that represents the most likely accuracy probability for this prediction
closestPredictionAccuracy := 0
// This is a value that represents the distance our closest prediction accuracy has from the current prediction
// Consider each prediction accuracy value on an (X,Y) coordinate plane
// X = Number of loci tested
// Y = Number of phased loci
closestPredictionAccuracyDistance := float64(0)
anyOutcomeAccuracyFound := false
for traitOutcomeInfo, traitPredictionAccuracyInfo := range modelTraitAccuracyInfoMap{
outcomeName := traitOutcomeInfo.OutcomeName
if (outcomeName != predictedOutcomeName){
continue
}
probabilityOfCorrectOutcomePrediction := traitPredictionAccuracyInfo.ProbabilityOfCorrectOutcomePrediction
currentPercentageOfLociTested := traitOutcomeInfo.PercentageOfLociTested
currentPercentageOfPhasedLoci := traitOutcomeInfo.PercentageOfPhasedLoci
// Distance Formula for 2 coordinates (x1, y1) and (x2, y2):
// distance = √((x2 - x1)^2 + (y2 - y1)^2)
differenceInX := float64(currentPercentageOfLociTested - percentageOfLociTested)
differenceInY := float64(currentPercentageOfPhasedLoci - percentageOfPhasedLoci)
distance := math.Sqrt(math.Pow(differenceInX, 2) + math.Pow(differenceInY, 2))
if (distance == 0){
// We found the exact prediction accuracy
return probabilityOfCorrectOutcomePrediction
}
if (anyOutcomeAccuracyFound == false){
closestPredictionAccuracyDistance = distance
closestPredictionAccuracy = probabilityOfCorrectOutcomePrediction
anyOutcomeAccuracyFound = true
continue
} else {
if (distance < closestPredictionAccuracyDistance){
closestPredictionAccuracyDistance = distance
closestPredictionAccuracy = probabilityOfCorrectOutcomePrediction
}
}
}
if (anyOutcomeAccuracyFound == false){
// This means that our model has never actually predicted this outcome
// This shouldn't happen unless our model is really bad, or our training set has very few people with this outcome.
// We return a 0% accuracy rating
return 0
}
return closestPredictionAccuracy
}
predictionAccuracy := getPredictionAccuracy()
return true, true, predictedOutcomeName, predictionAccuracy, quantityOfLociKnown, quantityOfPhasedLoci, nil
}
// This function is used to predict numeric traits and polygenic disease risk scores
//Outputs:
// -bool: Neural network model exists for this attribute (neural network prediction is possible for this attribute)
// -bool: Attribute prediction is possible for this user (User has at least 1 known attribute locus value)
// -float64: Predicted attribute outcome (Example: Height in centimeters)
// -map[int]float64: Accuracy ranges map
// -Map Structure: Probability prediction is accurate (X) -> Distance from prediction that must be travelled in both directions to
// create a range in which the true value will fall into, X% of the time
// -int: Quantity of loci known
// -int: Quantity of phased loci
// -error
func GetNeuralNetworkNumericAttributePredictionFromGenomeMap(attributeName string, attributeLociList []int64, genomeMap map[int64]locusValue.LocusValue)(bool, bool, float64, map[int]float64, int, int, error){
predictionModelExists, predictionModelBytes := geneticPredictionModels.GetGeneticPredictionModelBytes(attributeName)
if (predictionModelExists == false){
// Prediction is not possible for this attribute
return false, false, 0, nil, 0, 0, nil
}
if (len(attributeLociList) == 0){
return false, false, 0, nil, 0, 0, errors.New("GetNeuralNetworkNumericAttributePredictionFromGenomeMap called with empty attributeLociList for attribute with an existing neural network.")
}
attributeLociListCopy := slices.Clone(attributeLociList)
slices.Sort(attributeLociListCopy)
neuralNetworkInput, quantityOfLociKnown, quantityOfPhasedLoci, err := createInputNeuralNetworkLayerFromGenomeMap(attributeLociListCopy, genomeMap)
if (err != nil) { return false, false, 0, nil, 0, 0, err }
if (quantityOfLociKnown == 0){
// We can't predict anything about this attribute for this genome
return true, false, 0, nil, 0, 0, nil
}
neuralNetworkObject, err := DecodeBytesToNeuralNetworkObject(predictionModelBytes)
if (err != nil) { return false, false, 0, nil, 0, 0, err }
outputLayer, err := GetNeuralNetworkRawPrediction(&neuralNetworkObject, true, neuralNetworkInput)
if (err != nil) { return false, false, 0, nil, 0, 0, err }
predictedOutcomeValue, err := GetNumericOutcomeValueFromOutputLayer(attributeName, outputLayer)
if (err != nil) { return false, false, 0, nil, 0, 0, err }
modelAccuracyInfoFile, err := geneticPredictionModels.GetPredictionModelNumericAttributeAccuracyInfoBytes(attributeName)
if (err != nil) { return false, false, 0, nil, 0, 0, err }
modelAccuracyInfoMap, err := DecodeBytesToNumericAttributePredictionAccuracyInfoMap(modelAccuracyInfoFile)
if (err != nil) { return false, false, 0, nil, 0, 0, err }
// We create a prediction confidence ranges map for our prediction
getPredictionConfidenceRangesMap := func()map[int]float64{
totalNumberOfAttributeLoci := len(attributeLociListCopy)
proportionOfLociTested := float64(quantityOfLociKnown)/float64(totalNumberOfAttributeLoci)
percentageOfLociTested := int(proportionOfLociTested * 100)
proportionOfPhasedLoci := float64(quantityOfPhasedLoci)/float64(totalNumberOfAttributeLoci)
percentageOfPhasedLoci := int(proportionOfPhasedLoci * 100)
// This is a value between 0 and 100 that represents the most similar confidence ranges map for this prediction
var closestPredictionConfidenceRangesMap map[int]float64
// This is a value that represents the distance our closest prediction confidence ranges map has from the current prediction
// Consider each prediction accuracy value on an (X,Y) coordinate plane
// X = Number of loci tested
// Y = Number of phased loci
closestPredictionConfidenceRangesMapDistance := float64(0)
for attributeOutcomeInfo, attributePredictionConfidenceRangesMap := range modelAccuracyInfoMap{
currentPercentageOfLociTested := attributeOutcomeInfo.PercentageOfLociTested
currentPercentageOfPhasedLoci := attributeOutcomeInfo.PercentageOfPhasedLoci
// Distance Formula for 2 coordinates (x1, y1) and (x2, y2):
// distance = √((x2 - x1)^2 + (y2 - y1)^2)
differenceInX := float64(currentPercentageOfLociTested - percentageOfLociTested)
differenceInY := float64(currentPercentageOfPhasedLoci - percentageOfPhasedLoci)
distance := math.Sqrt(math.Pow(differenceInX, 2) + math.Pow(differenceInY, 2))
if (distance == 0){
// We found the exact prediction confidence ranges map
return attributePredictionConfidenceRangesMap
}
if (closestPredictionConfidenceRangesMap == nil || distance < closestPredictionConfidenceRangesMapDistance){
closestPredictionConfidenceRangesMapDistance = distance
closestPredictionConfidenceRangesMap = attributePredictionConfidenceRangesMap
}
}
return closestPredictionConfidenceRangesMap
}
predictionConfidenceRangesMap := getPredictionConfidenceRangesMap()
return true, true, predictedOutcomeValue, predictionConfidenceRangesMap, quantityOfLociKnown, quantityOfPhasedLoci, nil
}
//Outputs:
// -[]float32: Input layer for neural network
// -int: Quantity of known loci
// -int: Quantity of phased loci
// -error
func createInputNeuralNetworkLayerFromGenomeMap(rsidsList []int64, genomeMap map[int64]locusValue.LocusValue)([]float32, int, int, error){
// In the inputLayer, each locus value is represented by 3 neurons:
// 1. LocusExists/LocusIsPhased
// -0 = Locus value is unknown
// -0.5 = Locus Is known, phase is unknown
// -1 = Locus Is Known, phase is known
// 2. Allele1 Locus Value (Value between 0-1)
// -0 = Value is unknown
// 3. Allele2 Locus Value (Value between 0-1)
// -0 = Value is unknown
//
neuralNetworkInput := make([]float32, 0)
quantityOfLociKnown := 0
quantityOfPhasedLoci := 0
for _, rsID := range rsidsList{
userLocusValue, exists := genomeMap[rsID]
if (exists == false){
neuralNetworkInput = append(neuralNetworkInput, 0, 0, 0)
continue
}
quantityOfLociKnown += 1
locusAllele1 := userLocusValue.Base1Value
locusAllele2 := userLocusValue.Base2Value
locusIsPhased := userLocusValue.LocusIsPhased
getNeuron1 := func()float32{
if (locusIsPhased == false){
return 0.5
}
quantityOfPhasedLoci += 1
return 1
}
neuron1 := getNeuron1()
neuron2, err := convertAlleleToNeuron(locusAllele1)
if (err != nil) { return nil, 0, 0, err }
neuron3, err := convertAlleleToNeuron(locusAllele2)
if (err != nil) { return nil, 0, 0, err }
neuralNetworkInput = append(neuralNetworkInput, neuron1, neuron2, neuron3)
}
return neuralNetworkInput, quantityOfLociKnown, quantityOfLociKnown, nil
}
//Outputs:
// -int: Number of loci values that are known
// -int: Number of loci values that are known and phased
// -int: Number of loci
// -error
func GetLociInfoFromNetworkInputLayer(inputLayer []float32)(int, int, int, error){
// Each input layer has 3 neurons for each locus
// Each rsID (locus) is represented by 3 neurons: LocusExists/LocusIsPhased, Allele1 Value, Allele2 Value
// The LocusExists/LocusIsPhased neuron stores information like so:
// -0 = Locus value is unknown
// -0.5 = Locus Is known, phase is unknown
// -1 = Locus Is Known, phase is known
// Each rsID's neurons are concatenated together to form the inputLayer
inputLayerLength := len(inputLayer)
if (inputLayerLength%3 != 0){
return 0, 0, 0, errors.New("GetLociInfoFromNetworkInputLayer called with invalid length input layer: Not evenly divisible by 4.")
}
numberOfLoci := len(inputLayer)/3
numberOfLociValuesThatAreKnown := 0
numberOfLociValuesThatAreKnownAndPhased := 0
for index, neuronValue := range inputLayer{
indexRemainder := index%3
if (indexRemainder == 0){
if (neuronValue == 0){
continue
}
numberOfLociValuesThatAreKnown += 1
/// We use an inequality instead of ==1 because floats are imprecise
if (neuronValue > 0.99){
numberOfLociValuesThatAreKnown += 1
numberOfLociValuesThatAreKnownAndPhased += 1
}
}
}
if (numberOfLociValuesThatAreKnown == 0){
return 0, 0, 0, errors.New("GetLociInfoFromNetworkInputLayer called with input layer with no known loci values.")
}
return numberOfLociValuesThatAreKnown, numberOfLociValuesThatAreKnownAndPhased, numberOfLoci, nil
}
// This function returns which outcome is being described from a neural network's final output layer
// This is only used for discrete traits
// Outputs:
// -string: Output Name (Example: "Blue")
// -error
func GetDiscreteOutcomeNameFromOutputLayer(traitName string, verifyOutputLayer bool, outputLayer []float32)(string, error){
if (verifyOutputLayer == true){
// We make sure all neurons sum to 1
summedNeurons := float32(0)
for _, neuronValue := range outputLayer{
summedNeurons += neuronValue
}
// We allow a small amount of inaccuracy due to the imprecise nature of floats.
if (summedNeurons > 1.01 || summedNeurons < .99){
summedNeuronsString := helpers.ConvertFloat32ToString(summedNeurons)
return "", errors.New("GetOutcomeNameFromOutputLayer called with layer containing neuron values which don't sum to 1: " + summedNeuronsString)
}
}
getBiggestNeuronIndex := func()int{
biggestNeuronValue := float32(0)
biggestNeuronIndex := 0
for index, neuronValue := range outputLayer{
if (index == 0){
biggestNeuronValue = neuronValue
} else {
if (neuronValue > biggestNeuronValue){
biggestNeuronValue = neuronValue
biggestNeuronIndex = index
}
}
}
return biggestNeuronIndex
}
biggestNeuronIndex := getBiggestNeuronIndex()
switch traitName{
case "Eye Color":{
if (len(outputLayer) != 4){
return "", errors.New("GetOutcomeNameFromOutputLayer called with invalid length output layer.")
}
switch biggestNeuronIndex{
case 0:{
return "Blue", nil
}
case 1:{
return "Green", nil
}
case 2:{
return "Hazel", nil
}
case 3:{
return "Brown", nil
}
}
}
case "Lactose Tolerance":{
if (len(outputLayer) != 2){
return "", errors.New("GetOutcomeNameFromOutputLayer called with invalid length output layer.")
}
switch biggestNeuronIndex{
case 0:{
return "Tolerant", nil
}
case 1:{
return "Intolerant", nil
}
}
}
}
return "", errors.New("GetOutcomeNameFromOutputLayer called with unknown traitName: " + traitName)
}
// This function returns which outcome is being described from a neural network's final output layer
// This is only used for numeric traits and polygenic diseases
// Outputs:
// -float64: Output Value (example: 150 centimeters)
// -error
func GetNumericOutcomeValueFromOutputLayer(attributeName string, outputLayer []float32)(float64, error){
if (len(outputLayer) != 1){
return 0, errors.New("GetNumericOutcomeValueFromOutputLayer called with output layer which is not length of 1")
}
outputNeuron := outputLayer[0]
if (outputNeuron < 0 || outputNeuron > 1){
return 0, errors.New("GetNumericOutcomeValueFromOutputLayer called with output layer contains out-of-bounds neuron")
}
getOutcomeMinAndMax := func()(float64, float64, error){
switch attributeName{
case "Height":{
// Shortest person of all time: 54 cm
// Tallest person of all time: 272 cm
return 54, 272, nil
}
case "Autism",
"Homosexualness",
"Obesity":{
return 0, 10, nil
}
}
return 0, 0, errors.New("GetNumericOutcomeValueFromOutputLayer called with unknown attributeName: " + attributeName)
}
outcomeMin, outcomeMax, err := getOutcomeMinAndMax()
if (err != nil) { return 0, err }
outcomeValue, err := helpers.ScaleFloat64Proportionally(true, float64(outputNeuron), 0, 1, outcomeMin, outcomeMax)
if (err != nil) { return 0, err }
return outcomeValue, nil
}
//Outputs:
// -int: Layer 1 neuron count (input layer)
// -int: Layer 2 neuron count
// -int: Layer 3 neuron count
// -int: Layer 4 neuron count (output layer)
// -error
func getNeuralNetworkLayerSizes(attributeName string)(int, int, int, int, error){
switch attributeName{
case "Eye Color":{
// There are 282 input neurons
// There are 4 output neurons, each representing a color
// There are 4 colors: Blue, Green, Brown, Hazel
return 282, 100, 50, 4, nil
}
case "Lactose Tolerance":{
// There are 6 input neurons
// There are 2 output neurons, each representing a tolerance: Tolerant, Intolerant
return 6, 4, 3, 2, nil
}
case "Height":{
// There are 3000 input neurons
// There is 1 output neuron, representing a height value
return 3000, 3, 2, 1, nil
}
case "Autism":{
// There are 1428 input neurons
// There is 1 output neuron, representing an autism value
return 1428, 3, 2, 1, nil
}
case "Homosexualness":{
// There are 12 input neurons
// There is 1 output neuron, representing a homosexualness value
return 12, 10, 5, 1, nil
}
case "Obesity":{
// There are 3000 input neurons
// There is 1 output neuron, representing an obesity value
return 3000, 3, 2, 1, nil
}
}
return 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown attributeName: " + attributeName)
}
//This function converts a genome allele to a neuron to use in a tensor
// A value of 0 means that the allele is unknown
func convertAlleleToNeuron(allele string)(float32, error){
switch allele{
case "C":{
return 0.16, nil
}
case "A":{
return 0.32, nil
}
case "T":{
return 0.48, nil
}
case "G":{
return 0.64, nil
}
case "I":{
return 0.8, nil
}
case "D":{
return 1, nil
}
}
return 0, errors.New("convertAlleleToNeuron called with invalid allele: " + allele)
}
// This function returns training data to use to train each neural network prediction model
// Outputs:
// -bool: User has phenotype data and enough loci to train model
// -[]TrainingData: List of TrainingData for the user which we will use to train the model
// -error
func CreateGeneticPredictionTrainingData_OpenSNP(
attributeName string,
userPhenotypeDataObject readBiobankData.PhenotypeData_OpenSNP,
userLocusValuesMap map[int64]locusValue.LocusValue)(bool, []TrainingData, error){
//Outputs:
// -[]int64: Attribute rsIDs list
// -error
getAttributeLociList := func()([]int64, error){
switch attributeName{
case "Eye Color",
"Lactose Tolerance",
"Height",
"Homosexualness":{
traitObject, err := traits.GetTraitObject(attributeName)
if (err != nil) { return nil, err }
// This is a list of rsIDs which influence this trait
traitLociList := traitObject.LociList
return traitLociList, nil
}
case "Autism",
"Obesity":{
diseaseObject, err := polygenicDiseases.GetPolygenicDiseaseObject(attributeName)
if (err != nil) { return nil, err }
// This is a list of rsIDs which influence this disease
diseaseLociList := diseaseObject.LociList
return diseaseLociList, nil
}
}
return nil, errors.New("CreateGeneticPredictionTrainingData_OpenSNP called with unknown attributeName: " + attributeName)
}
attributeLociList, err := getAttributeLociList()
if (err != nil) { return false, nil, err }
if (len(attributeLociList) == 0){
return false, nil, errors.New("getAttributeLociList returning empty attributeLociList.")
}
// Each layer is represented as a []float32
// Each float is a value between 0 and 1
//
// Each TrainingData holds a variation of the user's genome rsID values
// We add many rows with withheld data to improve training data
numberOfInputLayerRows, _, _, numberOfOutputLayerRows, err := getNeuralNetworkLayerSizes(attributeName)
if (err != nil) { return false, nil, err }
// Each rsID is represented by 3 neurons: LocusExists/LocusIsPhased, Allele1 Value, Allele2 Value
// The LocusExists/LocusIsPhased neuron stores information like so:
// -0 = Locus value is unknown
// -0.5 = Locus Is known, phase is unknown
// -1 = Locus Is Known, phase is known
expectedNumberOfInputLayerRows := len(attributeLociList) * 3
if (numberOfInputLayerRows != expectedNumberOfInputLayerRows){
expectedNumberOfInputLayerRowsString := helpers.ConvertIntToString(expectedNumberOfInputLayerRows)
return false, nil, errors.New("numberOfInputLayerRows is not expected: " + expectedNumberOfInputLayerRowsString)
}
checkIfAnyAttributeLocusValuesExist := func()bool{
for _, rsID := range attributeLociList{
_, exists := userLocusValuesMap[rsID]
if (exists == true){
return true
}
}
return false
}
anyAttributeLocusValuesExist := checkIfAnyAttributeLocusValuesExist()
if (anyAttributeLocusValuesExist == false){
// The user's genome does not contain any of this attribute's locus values
// We will not train on their data
return false, nil, nil
}
// We sort rsIDs in ascending order
attributeLociListCopy := slices.Clone(attributeLociList)
slices.Sort(attributeLociListCopy)
// This function returns the outputLayer for all trainingDatas for this user
// Each outputLayer represents the user's attribute value (Example: "Blue" for Eye Color)
// Each outputLayer is identical, because each TrainingData example belongs to the same user
//
// Outputs:
// -bool: User attribute value is known
// -[]float32: Neuron values for layer
// -error
getUserAttributeValueNeurons := func()(bool, []float32, error){
switch attributeName{
case "Eye Color":{
userEyeColorIsKnown := userPhenotypeDataObject.EyeColorIsKnown
if (userEyeColorIsKnown == false){
return false, nil, nil
}
userEyeColor := userPhenotypeDataObject.EyeColor
if (userEyeColor == "Blue"){
return true, []float32{1, 0, 0, 0}, nil
} else if (userEyeColor == "Green"){
return true, []float32{0, 1, 0, 0}, nil
} else if (userEyeColor == "Hazel"){
return true, []float32{0, 0, 1, 0}, nil
} else if (userEyeColor == "Brown"){
return true, []float32{0, 0, 0, 1}, nil
}
return false, nil, errors.New("Malformed userPhenotypeDataObject: Invalid eyeColor: " + userEyeColor)
}
case "Lactose Tolerance":{
userLactoseToleranceIsKnown := userPhenotypeDataObject.LactoseToleranceIsKnown
if (userLactoseToleranceIsKnown == false){
return false, nil, nil
}
userLactoseTolerance := userPhenotypeDataObject.LactoseTolerance
if (userLactoseTolerance == true){
return true, []float32{1, 0}, nil
}
return true, []float32{0, 1}, nil
}
case "Height":{
userHeightIsKnown := userPhenotypeDataObject.HeightIsKnown
if (userHeightIsKnown == false){
return false, nil, nil
}
userHeight := userPhenotypeDataObject.Height
// Shortest person of all time: 54 cm
// Tallest person of all time: 272 cm
outputValue, err := helpers.ScaleFloat64Proportionally(true, userHeight, 54, 272, 0, 1)
if (err != nil) { return false, nil, err }
outputValueFloat32 := float32(outputValue)
outputLayer := []float32{outputValueFloat32}
return true, outputLayer, nil
}
case "Autism":{
userAutismIsKnown := userPhenotypeDataObject.AutismIsKnown
if (userAutismIsKnown == false){
return false, nil, nil
}
userAutism := userPhenotypeDataObject.Autism
outputValueFloat32 := float32(userAutism)
outputLayer := []float32{outputValueFloat32}
return true, outputLayer, nil
}
case "Homosexualness":{
userHomosexualnessIsKnown := userPhenotypeDataObject.HomosexualnessIsKnown
if (userHomosexualnessIsKnown == false){
return false, nil, nil
}
userHomosexualness := userPhenotypeDataObject.Homosexualness
outputValueFloat32 := float32(userHomosexualness)
outputLayer := []float32{outputValueFloat32}
return true, outputLayer, nil
}
case "Obesity":{
userObesityIsKnown := userPhenotypeDataObject.ObesityIsKnown
if (userObesityIsKnown == false){
return false, nil, nil
}
userObesity := userPhenotypeDataObject.Obesity
outputValueFloat32 := float32(userObesity)
outputLayer := []float32{outputValueFloat32}
return true, outputLayer, nil
}
}
return false, nil, errors.New("Unknown attributeName: " + attributeName)
}
userAttributeValueExists, userAttributeValueNeurons, err := getUserAttributeValueNeurons()
if (err != nil) { return false, nil, err }
if (userAttributeValueExists == false){
// User cannot be used to train the model.
// They do not have a value for this attribute.
return false, nil, nil
}
if (len(userAttributeValueNeurons) != numberOfOutputLayerRows){
return false, nil, errors.New("getUserAttributeValueNeurons returning invalid length layer slice.")
}
// We want the initial training data to be the same for each call of this function that has the same input parameters
// This is a necessary step so our neural network models will be reproducable
// Reproducable means that other people can run the code and produce the same models, byte-for-byte
pseudorandomNumberGenerator := mathRand.New(mathRand.NewPCG(1, 2))
// We create 110 examples per user.
// We randomize allele order whenever phase for the locus is unknown
// 50% of the time we randomize allele order even when phase is known to train the model on unphased data
// Unphased data is data where the order each allele (Example: G;A) has no meaning, because phase data was not captured
// We randomize allele order to simulate unphased data
// For example, if a user's genome is phased, we will randomize the base pair order and set the LocusIsPhased neuron to false
//
// Examples 0-10: 100% of the user's loci are used
// Examples 11-30: 90% of the user's loci are used
// Examples 31-50: 70% of the user's loci are used
// Examples 51-70: 50% of the user's loci are used
// Examples 71-90: 30% of the user's loci are used
// Examples 91-110: 10% of the user's loci are used
// We now add this user's data to the trainingDataList
trainingDataList := make([]TrainingData, 0, 110)
for i:=0; i < 110; i++{
getRandomizePhaseBool := func()bool{
if (i%2 == 0){
return true
}
return false
}
randomizePhaseBool := getRandomizePhaseBool()
getProbabilityOfUsingLoci := func()float64{
if (i <= 10){
return 1
} else if (i <= 30){
return 0.9
} else if (i <= 50){
return 0.7
} else if (i <= 70){
return 0.5
} else if (i <= 90){
return 0.3
}
return 0.1
}
probabilityOfUsingLoci := getProbabilityOfUsingLoci()
// In the inputLayer, each locus value is represented by 3 neurons:
// 1. LocusExists/LocusIsPhased
// -0 = Locus value is unknown
// -0.5 = Locus Is known, phase is unknown
// -1 = Locus Is Known, phase is known
// 2. Allele1 Locus Value (Value between 0-1)
// -0 = Value is unknown
// 3. Allele2 Locus Value (Value between 0-1)
// -0 = Value is unknown
anyLocusExists := false
inputLayerLength := len(attributeLociListCopy) * 3
inputLayer := make([]float32, 0, inputLayerLength)
for _, rsID := range attributeLociListCopy{
randomFloat := pseudorandomNumberGenerator.Float64()
if (randomFloat > probabilityOfUsingLoci){
// This if statement has a !probabilityOfUsingLoci chance of being true.
// We are skipping this locus
inputLayer = append(inputLayer, 0, 0, 0)
continue
}
userLocusValue, exists := userLocusValuesMap[rsID]
if (exists == false){
// This user's locus value is unknown
inputLayer = append(inputLayer, 0, 0, 0)
continue
}
anyLocusExists = true
//Outputs:
// -float32: Final neuron value
// -0 == Value is unknown
// -0.5 == Value is known, phase is unknown
// -1 == Value is known, phase is known
// -string: Allele 1 value
// -string: Allele 2 value
getFirstNeuronAndLocusAlleles := func()(float32, string, string){
locusAllele1 := userLocusValue.Base1Value
locusAllele2 := userLocusValue.Base2Value
if (locusAllele1 == locusAllele2){
// Locus phase is unimportant
return 1, locusAllele1, locusAllele2
}
locusIsPhased := userLocusValue.LocusIsPhased
if (randomizePhaseBool == false && locusIsPhased == true){
return 1, locusAllele1, locusAllele2
}
// We randomize the phase of the locus
// We always do this if the locus is not phased, because the genome data might actually be partially/fully phased,
// even if it is not advertised as being phased
randomNumber := pseudorandomNumberGenerator.IntN(2)
if (randomNumber == 1){
// This has a 50% chance of being true.
return 0.5, locusAllele1, locusAllele2
}
return 0.5, locusAllele2, locusAllele1
}
locusIsKnownAndPhasedNeuronValue, locusAllele1, locusAllele2 := getFirstNeuronAndLocusAlleles()
locusAllele1NeuronValue, err := convertAlleleToNeuron(locusAllele1)
if (err != nil){ return false, nil, err }
locusAllele2NeuronValue, err := convertAlleleToNeuron(locusAllele2)
if (err != nil) { return false, nil, err }
inputLayer = append(inputLayer, locusIsKnownAndPhasedNeuronValue, locusAllele1NeuronValue, locusAllele2NeuronValue)
}
if (anyLocusExists == false){
// We have 0 known loci for this training example.
// We won't add it to the training data.
continue
}
userAttributeValueNeuronsCopy := slices.Clone(userAttributeValueNeurons)
newTrainingData := TrainingData{
InputLayer: inputLayer,
OutputLayer: userAttributeValueNeuronsCopy,
}
trainingDataList = append(trainingDataList, newTrainingData)
}
return true, trainingDataList, nil
}
func GetNewUntrainedNeuralNetworkObject(attributeName string)(*NeuralNetwork, error){
layer1NeuronCount, layer2NeuronCount, layer3NeuronCount, layer4NeuronCount, err := getNeuralNetworkLayerSizes(attributeName)
if (err != nil) { return nil, err }
// This is the graph object we add each layer to
newGraph := gorgonia.NewGraph()
// We want the initial weights to be the same for each call of this function that has the same input parameters
// This is a necessary step so our neural network models will be reproducable
// Reproducable means that other people can run the code and produce the same models, byte-for-byte
pseudorandomNumberGenerator := mathRand.New(mathRand.NewPCG(1, 2))
// A layer is a column of neurons
// Each neuron has an initial value between 0 and 1
getNewNeuralNetworkLayerWeights := func(layerName string, layerNeuronRows int, layerNeuronColumns int)*gorgonia.Node{
layerNameObject := gorgonia.WithName(layerName)
totalNumberOfNeurons := layerNeuronRows * layerNeuronColumns
layerInitialWeightsList := make([]float32, 0, totalNumberOfNeurons)
for i:=0; i < totalNumberOfNeurons; i++{
// We initialize the weights with He initialization
// He initialization = (0 +/- sqrt(2/n) where n is the number of nodes in the prior layer)
// pseudorandomNumberGenerator.Float32() returns a pseudo-random number between 0 and 1
newWeight := ((pseudorandomNumberGenerator.Float32()-0.5)*2) * float32(math.Sqrt(float64(2)/float64(layerNeuronRows)))
layerInitialWeightsList = append(layerInitialWeightsList, newWeight)
}
layerBacking := tensor.WithBacking(layerInitialWeightsList)
layerShape := tensor.WithShape(layerNeuronRows, layerNeuronColumns)
layerTensor := tensor.New(layerBacking, layerShape)
layerValueObject := gorgonia.WithValue(layerTensor)
layerObject := gorgonia.NewMatrix(newGraph, tensor.Float32, layerNameObject, layerValueObject)
return layerObject
}
layer1 := getNewNeuralNetworkLayerWeights("Weights1", layer1NeuronCount, layer2NeuronCount)
layer2 := getNewNeuralNetworkLayerWeights("Weights2", layer2NeuronCount, layer3NeuronCount)
layer3 := getNewNeuralNetworkLayerWeights("Weights3", layer3NeuronCount, layer4NeuronCount)
newNeuralNetworkObject := NeuralNetwork{
graph: newGraph,
weights1: layer1,
weights2: layer2,
weights3: layer3,
}
return &newNeuralNetworkObject, nil
}
// This function returns the weights of the neural network
// We need this for training
func (inputNetwork *NeuralNetwork)getLearnables()gorgonia.Nodes{
weights1 := inputNetwork.weights1
weights2 := inputNetwork.weights2
weights3 := inputNetwork.weights3
result := gorgonia.Nodes{weights1, weights2, weights3}
return result
}
// This function will train the neural network
// The function is passed a batch of TrainingData examples to train on
// Inputs:
// -string: Attribute Name
// -bool: Attribute is Numeric
// -An example of a numeric attribute is Height
// -An example of a discrete attribute is Eye Color, which has discrete outcomes (colors)
// -*NeuralNetwork
// -func()(bool, bool, TrainingData, error): Function to get the next training data.
// -Outputs:
// -bool: User stopped the training run
// -bool: Another training data exists
// -TrainingData: The next training data example
// -error
// Outputs:
// -bool: Process completed (was not stopped mid-way)
// -error
func TrainNeuralNetwork(attributeName string, attributeIsNumeric bool, neuralNetworkObject *NeuralNetwork, getNextTrainingData func()(bool, bool, TrainingData, error))(bool, error){
layer1NeuronCount, _, _, layer4NeuronCount, err := getNeuralNetworkLayerSizes(attributeName)
if (err != nil) { return false, err }
neuralNetworkGraph := neuralNetworkObject.graph
// We first create the input and output nodes
// They don't have any values yet.
trainingDataInputNode := gorgonia.NewMatrix(neuralNetworkGraph,
tensor.Float32,
gorgonia.WithName("Input"),
gorgonia.WithShape(1, layer1NeuronCount),
)
trainingDataExpectedOutputNode := gorgonia.NewMatrix(neuralNetworkGraph,
tensor.Float32,
gorgonia.WithName("ExpectedOutput"),
gorgonia.WithShape(1, layer4NeuronCount),
)
err = neuralNetworkObject.buildNeuralNetwork(trainingDataInputNode, attributeIsNumeric)
if (err != nil) { return false, err }
// This computes the loss (how accurate was our prediction)
losses, err := gorgonia.Sub(trainingDataExpectedOutputNode, neuralNetworkObject.prediction)
if (err != nil) { return false, err }
squareOfLosses, err := gorgonia.Square(losses)
if (err != nil) { return false, err }
// Cost is an average of the square of losses
cost, err := gorgonia.Mean(squareOfLosses)
if (err != nil) { return false, err }
neuralNetworkLearnables := neuralNetworkObject.getLearnables()
// Grad takes a scalar cost node and a list of with-regards-to, and returns the gradient
_, err = gorgonia.Grad(cost, neuralNetworkLearnables...)
if (err != nil) { return false, err }
bindDualValues := gorgonia.BindDualValues(neuralNetworkLearnables...)
// NewTapeMachine creates a Virtual Machine that compiles a graph into a prog.
virtualMachine := gorgonia.NewTapeMachine(neuralNetworkGraph, bindDualValues)
// This is the learn rate or step size for the solver.
learningRate := gorgonia.WithLearnRate(.01)
// This clips the gradient if it gets too crazy
// gradientClip := gorgonia.WithClip(.05)
solver := gorgonia.NewVanillaSolver(learningRate)
// solver := gorgonia.NewVanillaSolver(learningRate, gradientClip)
defer virtualMachine.Close()
for {
userStoppedTraining, nextDataExists, trainingDataObject, err := getNextTrainingData()
if (err != nil) { return false, err }
if (userStoppedTraining == true){
// User manually stopped the training run
return false, nil
}
if (nextDataExists == false){
// We are done training
break
}
// We convert our input training data slices to the type *Dense and assign them to our nodes
// This inputLayer contains the allele values for this training example
trainingDataInputLayer := trainingDataObject.InputLayer
// This outputLayer contains the phenotype for this training example (example: Eye color of Blue)
trainingDataOutputLayer := trainingDataObject.OutputLayer
inputTensorShapeObject := tensor.WithShape(1, layer1NeuronCount)
outputTensorShapeObject := tensor.WithShape(1, layer4NeuronCount)
inputTensorBacking := tensor.WithBacking(trainingDataInputLayer)
outputTensorBacking := tensor.WithBacking(trainingDataOutputLayer)
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
outputTensor := tensor.New(outputTensorShapeObject, outputTensorBacking)
err = gorgonia.Let(trainingDataInputNode, inputTensor)
if (err != nil) { return false, err }
err = gorgonia.Let(trainingDataExpectedOutputNode, outputTensor)
if (err != nil) { return false, err }
err = virtualMachine.RunAll()
if (err != nil) { return false, err }
// NodesToValueGrads is a utility function that converts a Nodes to a slice of ValueGrad for the solver
valueGrads := gorgonia.NodesToValueGrads(neuralNetworkLearnables)
err = solver.Step(valueGrads)
if (err != nil) { return false, err }
virtualMachine.Reset()
}
return true, nil
}
// This function computes a raw prediction from the neural network
// Outputs:
// -[]float32: Output neurons
// -error
func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, attributeIsNumeric bool, inputLayer []float32)([]float32, error){
neuralNetworkGraph := inputNeuralNetwork.graph
numberOfInputNeurons := len(inputLayer)
inputNode := gorgonia.NewMatrix(neuralNetworkGraph,
tensor.Float32,
gorgonia.WithName("Input"),
gorgonia.WithShape(1, numberOfInputNeurons),
)
// We convert the inputLayer []float32 to a tensor *Dense object
inputTensorShapeObject := tensor.WithShape(1, numberOfInputNeurons)
inputTensorBacking := tensor.WithBacking(inputLayer)
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
err := gorgonia.Let(inputNode, inputTensor)
if (err != nil) { return nil, err }
err = inputNeuralNetwork.buildNeuralNetwork(inputNode, attributeIsNumeric)
if (err != nil){ return nil, err }
// Now we create a virtual machine to compute the prediction
neuralNetworkLearnables := inputNeuralNetwork.getLearnables()
bindDualValues := gorgonia.BindDualValues(neuralNetworkLearnables...)
virtualMachine := gorgonia.NewTapeMachine(neuralNetworkGraph, bindDualValues)
err = virtualMachine.RunAll()
if (err != nil) { return nil, err }
prediction := inputNeuralNetwork.prediction
predictionValues := prediction.Value().Data().([]float32)
return predictionValues, nil
}
// This function will take a neural network and input layer and build the network to be able to compute a prediction
// We need to run a virtual machine after calling this function in order for the prediction to be generated
func (inputNetwork *NeuralNetwork)buildNeuralNetwork(inputLayer *gorgonia.Node, predictionIsNumeric bool)error{
// We copy node pointer (says to do this in a resource i'm reading)
inputLayerCopy := inputLayer
// We multiply weights at each layer and perform ReLU (Rectification) after each multiplication
weights1 := inputNetwork.weights1
layer1Product, err := gorgonia.Mul(inputLayerCopy, weights1)
if (err != nil) {
return errors.New("Layer 1 multiplication failed: " + err.Error())
}
layer1ProductRectified, err := gorgonia.Rectify(layer1Product)
if (err != nil){
return errors.New("Layer 1 Rectify failed: " + err.Error())
}
weights2 := inputNetwork.weights2
layer2Product, err := gorgonia.Mul(layer1ProductRectified, weights2)
if (err != nil) {
return errors.New("Layer 2 multiplication failed: " + err.Error())
}
layer2ProductRectified, err := gorgonia.Rectify(layer2Product)
if (err != nil){
return errors.New("Layer 2 Rectify failed: " + err.Error())
}
weights3 := inputNetwork.weights3
layer3Product, err := gorgonia.Mul(layer2ProductRectified, weights3)
if (err != nil) {
return errors.New("Layer 3 multiplication failed: " + err.Error())
}
if (predictionIsNumeric == false){
// We SoftMax the output to get the prediction
prediction, err := gorgonia.SoftMax(layer3Product)
if (err != nil) {
return errors.New("SoftMax failed: " + err.Error())
}
inputNetwork.prediction = prediction
} else {
// We Sigmoid the output to get the prediction
prediction, err := gorgonia.Sigmoid(layer3Product)
if (err != nil) {
return errors.New("Sigmoid failed: " + err.Error())
}
inputNetwork.prediction = prediction
}
return nil
}