836 lines
25 KiB
Go
836 lines
25 KiB
Go
|
|
// geneticPrediction provides functions to train and query neural network models
|
|
// These models are currently used to predict traits such as eye color from user genome files
|
|
|
|
package geneticPrediction
|
|
|
|
// I am a neophyte in the ways of neural networks.
|
|
// Machine learning experts should chime in and offer improvements.
|
|
// We have to make sure that model inference remains very fast
|
|
// Sorting matches by offspring total polygenic disease score will require inference on dozens of models for each match
|
|
// We could create slower models that provide more accurate predictions
|
|
|
|
import "seekia/resources/geneticReferences/traits"
|
|
|
|
import "seekia/internal/genetics/locusValue"
|
|
import "seekia/internal/genetics/readBiobankData"
|
|
import "seekia/internal/helpers"
|
|
|
|
import "gorgonia.org/gorgonia"
|
|
import "gorgonia.org/tensor"
|
|
|
|
import mathRand "math/rand"
|
|
import "bytes"
|
|
import "encoding/gob"
|
|
import "slices"
|
|
import "errors"
|
|
|
|
|
|
type NeuralNetwork struct{
|
|
|
|
// ExprGraph is a data structure for a directed acyclic graph (of expressions).
|
|
graph *gorgonia.ExprGraph
|
|
|
|
// These are the weights for each layer of neurons
|
|
weights1 *gorgonia.Node
|
|
weights2 *gorgonia.Node
|
|
weights3 *gorgonia.Node
|
|
weights4 *gorgonia.Node
|
|
|
|
// This is the computed prediction
|
|
prediction *gorgonia.Node
|
|
}
|
|
|
|
// This struct stores a user's training data
|
|
// Each TrainingData represents a single data example
|
|
// For example, the InputLayer is a column of neurons representing a user's genetics,
|
|
// and the OutputLayer is a column representing their phenotype, such as eye color
|
|
type TrainingData struct{
|
|
|
|
// InputLayer stores relevant rsID values for each trait from the user's genomes
|
|
// It also stores if each rsID is phased and if each rsID exists
|
|
InputLayer []float32
|
|
|
|
// OutputLayer stores user phenotype data as neurons
|
|
// Each neuron represents an outcome
|
|
// For example, for Eye Color, each neuron represents an eye color
|
|
OutputLayer []float32
|
|
}
|
|
|
|
|
|
func EncodeTrainingDataObjectToBytes(inputTrainingData TrainingData)([]byte, error){
|
|
|
|
buffer := new(bytes.Buffer)
|
|
|
|
encoder := gob.NewEncoder(buffer)
|
|
|
|
err := encoder.Encode(inputTrainingData)
|
|
if (err != nil) { return nil, err }
|
|
|
|
trainingDataBytes := buffer.Bytes()
|
|
|
|
return trainingDataBytes, nil
|
|
}
|
|
|
|
func DecodeBytesToTrainingDataObject(inputTrainingData []byte)(TrainingData, error){
|
|
|
|
if (inputTrainingData == nil){
|
|
return TrainingData{}, errors.New("DecodeBytesToTrainingDataObject called with nil inputTrainingData.")
|
|
}
|
|
|
|
buffer := bytes.NewBuffer(inputTrainingData)
|
|
|
|
decoder := gob.NewDecoder(buffer)
|
|
|
|
var newTrainingData TrainingData
|
|
|
|
err := decoder.Decode(&newTrainingData)
|
|
if (err != nil){ return TrainingData{}, err }
|
|
|
|
return newTrainingData, nil
|
|
}
|
|
|
|
// We use this to store a neural network's weights as a .gob file
|
|
type neuralNetworkForEncoding struct{
|
|
|
|
// These are the weights for each layer of neurons
|
|
Weights1 []float32
|
|
Weights2 []float32
|
|
Weights3 []float32
|
|
Weights4 []float32
|
|
|
|
Weights1Rows int
|
|
Weights1Columns int
|
|
Weights2Rows int
|
|
Weights2Columns int
|
|
Weights3Rows int
|
|
Weights3Columns int
|
|
Weights4Rows int
|
|
Weights4Columns int
|
|
}
|
|
|
|
func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte, error){
|
|
|
|
weights1 := inputNeuralNetwork.weights1
|
|
weights2 := inputNeuralNetwork.weights2
|
|
weights3 := inputNeuralNetwork.weights3
|
|
weights4 := inputNeuralNetwork.weights4
|
|
|
|
weights1Slice := weights1.Value().Data().([]float32)
|
|
weights2Slice := weights2.Value().Data().([]float32)
|
|
weights3Slice := weights3.Value().Data().([]float32)
|
|
weights4Slice := weights4.Value().Data().([]float32)
|
|
|
|
weights1Rows := weights1.Shape()[0]
|
|
weights1Columns := weights1.Shape()[1]
|
|
weights2Rows := weights2.Shape()[0]
|
|
weights2Columns := weights2.Shape()[1]
|
|
weights3Rows := weights3.Shape()[0]
|
|
weights3Columns := weights3.Shape()[1]
|
|
weights4Rows := weights4.Shape()[0]
|
|
weights4Columns := weights4.Shape()[1]
|
|
|
|
newNeuralNetworkForEncoding := neuralNetworkForEncoding{
|
|
Weights1: weights1Slice,
|
|
Weights2: weights2Slice,
|
|
Weights3: weights3Slice,
|
|
Weights4: weights4Slice,
|
|
|
|
Weights1Rows: weights1Rows,
|
|
Weights1Columns: weights1Columns,
|
|
Weights2Rows: weights2Rows,
|
|
Weights2Columns: weights2Columns,
|
|
Weights3Rows: weights3Rows,
|
|
Weights3Columns: weights3Columns,
|
|
Weights4Rows: weights4Rows,
|
|
Weights4Columns: weights4Columns,
|
|
}
|
|
|
|
buffer := new(bytes.Buffer)
|
|
|
|
encoder := gob.NewEncoder(buffer)
|
|
|
|
err := encoder.Encode(newNeuralNetworkForEncoding)
|
|
if (err != nil) { return nil, err }
|
|
|
|
neuralNetworkBytes := buffer.Bytes()
|
|
|
|
return neuralNetworkBytes, nil
|
|
}
|
|
|
|
func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork, error){
|
|
|
|
if (inputNeuralNetwork == nil){
|
|
return NeuralNetwork{}, errors.New("DecodeBytesToNeuralNetworkObject called with nil inputNeuralNetwork.")
|
|
}
|
|
|
|
buffer := bytes.NewBuffer(inputNeuralNetwork)
|
|
|
|
decoder := gob.NewDecoder(buffer)
|
|
|
|
var newNeuralNetworkForEncoding neuralNetworkForEncoding
|
|
|
|
err := decoder.Decode(&newNeuralNetworkForEncoding)
|
|
if (err != nil){ return NeuralNetwork{}, err }
|
|
|
|
weights1 := newNeuralNetworkForEncoding.Weights1
|
|
weights2 := newNeuralNetworkForEncoding.Weights2
|
|
weights3 := newNeuralNetworkForEncoding.Weights3
|
|
weights4 := newNeuralNetworkForEncoding.Weights4
|
|
|
|
weights1Rows := newNeuralNetworkForEncoding.Weights1Rows
|
|
weights1Columns := newNeuralNetworkForEncoding.Weights1Columns
|
|
weights2Rows := newNeuralNetworkForEncoding.Weights2Rows
|
|
weights2Columns := newNeuralNetworkForEncoding.Weights2Columns
|
|
weights3Rows := newNeuralNetworkForEncoding.Weights3Rows
|
|
weights3Columns := newNeuralNetworkForEncoding.Weights3Columns
|
|
weights4Rows := newNeuralNetworkForEncoding.Weights4Rows
|
|
weights4Columns := newNeuralNetworkForEncoding.Weights4Columns
|
|
|
|
// This is the graph object we add each layer to
|
|
newGraph := gorgonia.NewGraph()
|
|
|
|
// A layer is a column of neurons
|
|
// Each neuron has an initial value between 0 and 1
|
|
getNewNeuralNetworkLayerWeights := func(layerName string, layerNeuronRows int, layerNeuronColumns int, layerWeightsList []float32)*gorgonia.Node{
|
|
|
|
layerNameObject := gorgonia.WithName(layerName)
|
|
|
|
layerBacking := tensor.WithBacking(layerWeightsList)
|
|
layerShape := tensor.WithShape(layerNeuronRows, layerNeuronColumns)
|
|
layerTensor := tensor.New(layerBacking, layerShape)
|
|
|
|
layerValueObject := gorgonia.WithValue(layerTensor)
|
|
|
|
layerObject := gorgonia.NewMatrix(newGraph, tensor.Float32, layerNameObject, layerValueObject)
|
|
|
|
return layerObject
|
|
}
|
|
|
|
layer1 := getNewNeuralNetworkLayerWeights("Weights1", weights1Rows, weights1Columns, weights1)
|
|
layer2 := getNewNeuralNetworkLayerWeights("Weights2", weights2Rows, weights2Columns, weights2)
|
|
layer3 := getNewNeuralNetworkLayerWeights("Weights3", weights3Rows, weights3Columns, weights3)
|
|
layer4 := getNewNeuralNetworkLayerWeights("Weights4", weights4Rows, weights4Columns, weights4)
|
|
|
|
newNeuralNetworkObject := NeuralNetwork{
|
|
|
|
graph: newGraph,
|
|
|
|
weights1: layer1,
|
|
weights2: layer2,
|
|
weights3: layer3,
|
|
weights4: layer4,
|
|
}
|
|
|
|
return newNeuralNetworkObject, nil
|
|
}
|
|
|
|
|
|
//Outputs:
|
|
// -int: Layer 1 neuron count (input layer)
|
|
// -int: Layer 2 neuron count
|
|
// -int: Layer 3 neuron count
|
|
// -int: Layer 4 neuron count
|
|
// -int: Layer 5 neuron count (output layer)
|
|
// -error
|
|
func getNeuralNetworkLayerSizes(traitName string)(int, int, int, int, int, error){
|
|
|
|
switch traitName{
|
|
|
|
case "Eye Color":{
|
|
|
|
// There are 376 input neurons
|
|
// There are 4 output neurons, each representing a color
|
|
// There are 4 colors: Blue, Green, Brown, Hazel
|
|
|
|
return 376, 200, 100, 50, 4, nil
|
|
}
|
|
}
|
|
|
|
return 0, 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown traitName: " + traitName)
|
|
}
|
|
|
|
//This function converts a genome allele to a neuron to use in a tensor
|
|
func convertAlleleToNeuron(allele string)(float32, error){
|
|
|
|
switch allele{
|
|
|
|
case "C":{
|
|
|
|
return 0, nil
|
|
}
|
|
case "A":{
|
|
|
|
return 0.2, nil
|
|
}
|
|
case "T":{
|
|
|
|
return 0.4, nil
|
|
}
|
|
case "G":{
|
|
|
|
return 0.6, nil
|
|
}
|
|
case "I":{
|
|
|
|
return 0.8, nil
|
|
}
|
|
case "D":{
|
|
|
|
return 1, nil
|
|
}
|
|
}
|
|
|
|
return 0, errors.New("convertAlleleToNeuron called with invalid allele: " + allele)
|
|
}
|
|
|
|
|
|
// This function returns training data to use to train each neural network prediction model
|
|
// Outputs:
|
|
// -bool: User has phenotype data and enough loci to train model
|
|
// -[]TrainingData: List of TrainingData for the user which we will use to train the model
|
|
// -error
|
|
func CreateGeneticPredictionTrainingData_OpenSNP(
|
|
traitName string,
|
|
userPhenotypeDataObject readBiobankData.PhenotypeData_OpenSNP,
|
|
userLocusValuesMap map[int64]locusValue.LocusValue)(bool, []TrainingData, error){
|
|
|
|
if (traitName != "Eye Color"){
|
|
return false, nil, errors.New("CreateGeneticPredictionTrainingData_OpenSNP called with unknown traitName: " + traitName)
|
|
}
|
|
|
|
traitObject, err := traits.GetTraitObject(traitName)
|
|
if (err != nil) { return false, nil, err }
|
|
|
|
// This is a list of rsIDs which influence this trait
|
|
traitRSIDs := traitObject.LociList
|
|
|
|
if (len(traitRSIDs) == 0){
|
|
return false, nil, errors.New("traitObject contains no rsIDs.")
|
|
}
|
|
|
|
// Each layer is represented as a []float32
|
|
// Each float is a value between 0 and 1
|
|
//
|
|
// Each TrainingData holds a variation of the user's genome rsID values
|
|
// We add many rows with withheld data to improve training data
|
|
|
|
numberOfInputLayerRows, _, _, _, numberOfOutputLayerRows, err := getNeuralNetworkLayerSizes(traitName)
|
|
if (err != nil) { return false, nil, err }
|
|
|
|
// Each rsID is represented by 4 neurons: LocusExists, LocusIsPhased, Allele1 Value, Allele2 Value
|
|
expectedNumberOfInputLayerRows := len(traitRSIDs) * 4
|
|
|
|
if (numberOfInputLayerRows != expectedNumberOfInputLayerRows){
|
|
|
|
expectedNumberOfInputLayerRowsString := helpers.ConvertIntToString(expectedNumberOfInputLayerRows)
|
|
|
|
return false, nil, errors.New("numberOfInputLayerRows is not expected: " + expectedNumberOfInputLayerRowsString)
|
|
}
|
|
|
|
checkIfAnyTraitLocusValuesExist := func()bool{
|
|
|
|
for _, rsID := range traitRSIDs{
|
|
|
|
_, exists := userLocusValuesMap[rsID]
|
|
if (exists == true){
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
anyTraitLocusValuesExist := checkIfAnyTraitLocusValuesExist()
|
|
if (anyTraitLocusValuesExist == false){
|
|
// The user's genome does not contain any of this trait's locus values
|
|
// We will not train on their data
|
|
return false, nil, nil
|
|
}
|
|
|
|
// We sort rsIDs in ascending order
|
|
// We copy list so we don't change the original
|
|
|
|
traitRSIDsList := slices.Clone(traitRSIDs)
|
|
|
|
slices.Sort(traitRSIDsList)
|
|
|
|
// This function returns the outputLayer for all trainingDatas for this user
|
|
// Each outputLayer represents the user's trait value (Example: "Blue" for Eye Color)
|
|
// Each outputLayer is identical, because each TrainingData example belongs to the same user
|
|
//
|
|
// Outputs:
|
|
// -bool: User trait value is known
|
|
// -[]float32: Neuron values for layer
|
|
// -error
|
|
getUserTraitValueNeurons := func()(bool, []float32, error){
|
|
|
|
if (traitName == "Eye Color"){
|
|
|
|
userEyeColorIsKnown := userPhenotypeDataObject.EyeColorIsKnown
|
|
if (userEyeColorIsKnown == false){
|
|
return false, nil, nil
|
|
}
|
|
|
|
userEyeColor := userPhenotypeDataObject.EyeColor
|
|
|
|
if (userEyeColor == "Blue"){
|
|
|
|
return true, []float32{1, 0, 0, 0}, nil
|
|
|
|
} else if (userEyeColor == "Green"){
|
|
|
|
return true, []float32{0, 1, 0, 0}, nil
|
|
|
|
} else if (userEyeColor == "Hazel"){
|
|
|
|
return true, []float32{0, 0, 1, 0}, nil
|
|
|
|
} else if (userEyeColor == "Brown"){
|
|
|
|
return true, []float32{0, 0, 0, 1}, nil
|
|
}
|
|
|
|
return false, nil, errors.New("Malformed userPhenotypeDataObject: Invalid eyeColor: " + userEyeColor)
|
|
}
|
|
|
|
return false, nil, errors.New("Unknown traitName: " + traitName)
|
|
}
|
|
|
|
userTraitValueExists, userTraitValueNeurons, err := getUserTraitValueNeurons()
|
|
if (err != nil) { return false, nil, err }
|
|
if (userTraitValueExists == false){
|
|
// User cannot be used to train the model.
|
|
// They do not have a value for this trait.
|
|
return false, nil, nil
|
|
}
|
|
|
|
if (len(userTraitValueNeurons) != numberOfOutputLayerRows){
|
|
return false, nil, errors.New("getUserTraitValueNeurons returning invalid length layer slice.")
|
|
}
|
|
|
|
// We create 110 examples per user.
|
|
// We randomize allele order whenever phase for the locus is unknown
|
|
// 50% of the time we randomize allele order even when phase is known to train the model on unphased data
|
|
// Unphased data is data where the order each allele (Example: G;A) has no meaning, because phase data was not captured
|
|
// We randomize allele order to simulate unphased data
|
|
// For example, if a user's genome is phased, we will randomize the base pair order and set the LocusIsPhased neuron to false
|
|
//
|
|
// Examples 0-10: 100% of the user's loci are used
|
|
// Examples 11-30: 90% of the user's loci are used
|
|
// Examples 31-50: 70% of the user's loci are used
|
|
// Examples 51-70: 50% of the user's loci are used
|
|
// Examples 71-90: 30% of the user's loci are used
|
|
// Examples 91-110: 10% of the user's loci are used
|
|
|
|
// We now add this user's data to the trainingDataList
|
|
|
|
trainingDataList := make([]TrainingData, 0, 110)
|
|
|
|
for i:=0; i < 110; i++{
|
|
|
|
getRandomizePhaseBool := func()bool{
|
|
|
|
if (i%2 == 0){
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
randomizePhaseBool := getRandomizePhaseBool()
|
|
|
|
getProbabilityOfUsingLoci := func()float64{
|
|
|
|
if (i <= 10){
|
|
return 1
|
|
|
|
} else if (i <= 30){
|
|
return 0.9
|
|
|
|
} else if (i <= 50){
|
|
return 0.7
|
|
|
|
} else if (i <= 70){
|
|
return 0.5
|
|
|
|
} else if (i <= 90){
|
|
return 0.3
|
|
}
|
|
|
|
return 0.1
|
|
}
|
|
|
|
probabilityOfUsingLoci := getProbabilityOfUsingLoci()
|
|
|
|
// In the inputLayer, each locus value is represented by 4 neurons:
|
|
// 1. LocusExists (Either 0 or 1)
|
|
// 2. LocusIsPhased (Either 0 or 1)
|
|
// 3. Allele1 Locus Value (Value between 0-1)
|
|
// 4. Allele2 Locus Value (Value between 0-1)
|
|
|
|
inputLayerLength := len(traitRSIDsList) * 4
|
|
|
|
inputLayer := make([]float32, 0, inputLayerLength)
|
|
|
|
for _, rsID := range traitRSIDsList{
|
|
|
|
useLocusBool, err := helpers.GetRandomBoolWithProbability(probabilityOfUsingLoci)
|
|
if (err != nil) { return false, nil, err }
|
|
if (useLocusBool == false){
|
|
// We are skipping this locus
|
|
inputLayer = append(inputLayer, 0, 0, 0, 0)
|
|
continue
|
|
}
|
|
|
|
userLocusValue, exists := userLocusValuesMap[rsID]
|
|
if (exists == false){
|
|
// This user's locus value is unknown
|
|
inputLayer = append(inputLayer, 0, 0, 0, 0)
|
|
continue
|
|
}
|
|
|
|
getLocusAlleles := func()(string, string){
|
|
|
|
locusAllele1 := userLocusValue.Base1Value
|
|
locusAllele2 := userLocusValue.Base2Value
|
|
|
|
if (randomizePhaseBool == false){
|
|
return locusAllele1, locusAllele2
|
|
}
|
|
|
|
randomBool := helpers.GetRandomBool()
|
|
|
|
if (randomBool == false){
|
|
return locusAllele1, locusAllele2
|
|
}
|
|
|
|
return locusAllele2, locusAllele1
|
|
}
|
|
|
|
locusAllele1, locusAllele2 := getLocusAlleles()
|
|
|
|
locusAllele1NeuronValue, err := convertAlleleToNeuron(locusAllele1)
|
|
if (err != nil){ return false, nil, err }
|
|
locusAllele2NeuronValue, err := convertAlleleToNeuron(locusAllele2)
|
|
if (err != nil) { return false, nil, err }
|
|
|
|
getLocusIsPhasedNeuronValue := func()float32{
|
|
|
|
if (randomizePhaseBool == true){
|
|
return 0
|
|
}
|
|
|
|
locusIsPhased := userLocusValue.LocusIsPhased
|
|
if (locusIsPhased == true){
|
|
return 1
|
|
}
|
|
|
|
return 0
|
|
}
|
|
|
|
locusIsPhasedNeuronValue := getLocusIsPhasedNeuronValue()
|
|
|
|
inputLayer = append(inputLayer, 1, locusIsPhasedNeuronValue, locusAllele1NeuronValue, locusAllele2NeuronValue)
|
|
}
|
|
|
|
userTraitValueNeuronsCopy := slices.Clone(userTraitValueNeurons)
|
|
|
|
newTrainingData := TrainingData{
|
|
InputLayer: inputLayer,
|
|
OutputLayer: userTraitValueNeuronsCopy,
|
|
}
|
|
|
|
trainingDataList = append(trainingDataList, newTrainingData)
|
|
}
|
|
|
|
return true, trainingDataList, nil
|
|
}
|
|
|
|
func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error){
|
|
|
|
layer1NeuronCount, layer2NeuronCount, layer3NeuronCount, layer4NeuronCount, layer5NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
|
|
if (err != nil) { return nil, err }
|
|
|
|
// This is the graph object we add each layer to
|
|
newGraph := gorgonia.NewGraph()
|
|
|
|
// We want the initial weights to be the same for each call of this function that has the same input parameters
|
|
// This is a necessary step so our neural network models will be reproducable
|
|
// Reproducable means that other people can run the code and produce the same models, byte-for-byte
|
|
|
|
pseudorandomNumberGenerator := mathRand.New(mathRand.NewSource(1))
|
|
|
|
// A layer is a column of neurons
|
|
// Each neuron has an initial value between 0 and 1
|
|
getNewNeuralNetworkLayerWeights := func(layerName string, layerNeuronRows int, layerNeuronColumns int)*gorgonia.Node{
|
|
|
|
layerNameObject := gorgonia.WithName(layerName)
|
|
|
|
totalNumberOfNeurons := layerNeuronRows * layerNeuronColumns
|
|
|
|
layerInitialWeightsList := make([]float32, 0, totalNumberOfNeurons)
|
|
|
|
for i:=0; i < totalNumberOfNeurons; i++{
|
|
|
|
// This returns a pseudo-random number between 0 and 1
|
|
newWeight := pseudorandomNumberGenerator.Float32()
|
|
|
|
layerInitialWeightsList = append(layerInitialWeightsList, newWeight)
|
|
}
|
|
|
|
layerBacking := tensor.WithBacking(layerInitialWeightsList)
|
|
|
|
layerShape := tensor.WithShape(layerNeuronRows, layerNeuronColumns)
|
|
|
|
layerTensor := tensor.New(layerBacking, layerShape)
|
|
|
|
layerValueObject := gorgonia.WithValue(layerTensor)
|
|
|
|
layerObject := gorgonia.NewMatrix(newGraph, tensor.Float32, layerNameObject, layerValueObject)
|
|
|
|
return layerObject
|
|
}
|
|
|
|
layer1 := getNewNeuralNetworkLayerWeights("Weights1", layer1NeuronCount, layer2NeuronCount)
|
|
layer2 := getNewNeuralNetworkLayerWeights("Weights2", layer2NeuronCount, layer3NeuronCount)
|
|
layer3 := getNewNeuralNetworkLayerWeights("Weights3", layer3NeuronCount, layer4NeuronCount)
|
|
layer4 := getNewNeuralNetworkLayerWeights("Weights4", layer4NeuronCount, layer5NeuronCount)
|
|
|
|
newNeuralNetworkObject := NeuralNetwork{
|
|
|
|
graph: newGraph,
|
|
|
|
weights1: layer1,
|
|
weights2: layer2,
|
|
weights3: layer3,
|
|
weights4: layer4,
|
|
}
|
|
|
|
return &newNeuralNetworkObject, nil
|
|
}
|
|
|
|
// This function returns the weights of the neural network
|
|
// We need this for training
|
|
func (inputNetwork *NeuralNetwork)getLearnables()gorgonia.Nodes{
|
|
|
|
weights1 := inputNetwork.weights1
|
|
weights2 := inputNetwork.weights2
|
|
weights3 := inputNetwork.weights3
|
|
weights4 := inputNetwork.weights4
|
|
|
|
result := gorgonia.Nodes{weights1, weights2, weights3, weights4}
|
|
|
|
return result
|
|
}
|
|
|
|
|
|
// This function will train the neural network
|
|
// The function is passed a single TrainingData example to train on
|
|
//
|
|
// TODO: This function doesn't work
|
|
// The weights do not change during training
|
|
// I think the layer dimensions are wrong?
|
|
//
|
|
func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, trainingData TrainingData)error{
|
|
|
|
layer1NeuronCount, _, _, _, layer5NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
|
|
if (err != nil) { return err }
|
|
|
|
neuralNetworkGraph := neuralNetworkObject.graph
|
|
|
|
// This inputLayer contains the allele values for this training example
|
|
trainingDataInputLayer := trainingData.InputLayer
|
|
|
|
// This outputLayer contains the phenotype for this training example (example: Eye color of Blue)
|
|
trainingDataOutputLayer := trainingData.OutputLayer
|
|
|
|
// We convert our inputTensor and outputTensor to the type *Node
|
|
|
|
inputTensorShapeObject := tensor.WithShape(1, layer1NeuronCount)
|
|
outputTensorShapeObject := tensor.WithShape(1, layer5NeuronCount)
|
|
|
|
inputTensorBacking := tensor.WithBacking(trainingDataInputLayer)
|
|
outputTensorBacking := tensor.WithBacking(trainingDataOutputLayer)
|
|
|
|
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
|
|
outputTensor := tensor.New(outputTensorShapeObject, outputTensorBacking)
|
|
|
|
trainingDataInputNode := gorgonia.NewMatrix(neuralNetworkGraph,
|
|
tensor.Float32,
|
|
gorgonia.WithName("input"),
|
|
gorgonia.WithShape(1, layer1NeuronCount),
|
|
gorgonia.WithValue(inputTensor),
|
|
)
|
|
|
|
trainingDataOutputNode := gorgonia.NewMatrix(neuralNetworkGraph,
|
|
tensor.Float32,
|
|
gorgonia.WithName("expectedOutput"),
|
|
gorgonia.WithShape(1, layer5NeuronCount),
|
|
gorgonia.WithValue(outputTensor),
|
|
)
|
|
|
|
err = neuralNetworkObject.prepareToComputePrediction(trainingDataInputNode)
|
|
if (err != nil) { return err }
|
|
|
|
// This computes the loss (how accurate was our prediction)
|
|
losses, err := gorgonia.Sub(trainingDataOutputNode, neuralNetworkObject.prediction)
|
|
if (err != nil) { return err }
|
|
|
|
// Cost is an average of the losses
|
|
cost, err := gorgonia.Mean(losses)
|
|
if (err != nil) { return err }
|
|
|
|
neuralNetworkLearnables := neuralNetworkObject.getLearnables()
|
|
|
|
// Grad takes a scalar cost node and a list of with-regards-to, and returns the gradient
|
|
_, err = gorgonia.Grad(cost, neuralNetworkLearnables...)
|
|
if (err != nil) { return err }
|
|
|
|
bindDualValues := gorgonia.BindDualValues(neuralNetworkLearnables...)
|
|
|
|
// NewTapeMachine creates a Virtual Machine that compiles a graph into a prog.
|
|
virtualMachine := gorgonia.NewTapeMachine(neuralNetworkGraph, bindDualValues)
|
|
|
|
// This is the learn rate or step size for the solver.
|
|
learningRate := gorgonia.WithLearnRate(.001)
|
|
|
|
// This clips the gradient if it gets too crazy
|
|
//gradientClip := gorgonia.WithClip(5)
|
|
|
|
solver := gorgonia.NewVanillaSolver(learningRate)
|
|
//solver := gorgonia.NewVanillaSolver(learningRate, gradientClip)
|
|
|
|
for i:=0; i < 10; i++{
|
|
|
|
err = virtualMachine.RunAll()
|
|
if (err != nil) { return err }
|
|
|
|
// NodesToValueGrads is a utility function that converts a Nodes to a slice of ValueGrad for the solver
|
|
valueGrads := gorgonia.NodesToValueGrads(neuralNetworkLearnables)
|
|
|
|
err := solver.Step(valueGrads)
|
|
if (err != nil) { return err }
|
|
|
|
virtualMachine.Reset()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
|
|
// This function computes a raw prediction from the neural network
|
|
// Outputs:
|
|
// -[]float32: Output neurons
|
|
// -error
|
|
func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, inputLayer []float32)([]float32, error){
|
|
|
|
neuralNetworkGraph := inputNeuralNetwork.graph
|
|
|
|
// We convert the inputLayer []float32 to a node object
|
|
|
|
numberOfInputNeurons := len(inputLayer)
|
|
|
|
inputTensorShapeObject := tensor.WithShape(1, numberOfInputNeurons)
|
|
|
|
inputTensorBacking := tensor.WithBacking(inputLayer)
|
|
|
|
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
|
|
|
|
inputNode := gorgonia.NewMatrix(neuralNetworkGraph,
|
|
tensor.Float32,
|
|
gorgonia.WithName("input"),
|
|
gorgonia.WithShape(1, numberOfInputNeurons),
|
|
gorgonia.WithValue(inputTensor),
|
|
)
|
|
|
|
err := inputNeuralNetwork.prepareToComputePrediction(inputNode)
|
|
if (err != nil){ return nil, err }
|
|
|
|
prediction := inputNeuralNetwork.prediction
|
|
|
|
// Now we create a virtual machine to compute the prediction
|
|
|
|
neuralNetworkLearnables := inputNeuralNetwork.getLearnables()
|
|
|
|
bindDualValues := gorgonia.BindDualValues(neuralNetworkLearnables...)
|
|
|
|
virtualMachine := gorgonia.NewTapeMachine(neuralNetworkGraph, bindDualValues)
|
|
|
|
err = virtualMachine.RunAll()
|
|
if (err != nil) { return nil, err }
|
|
|
|
predictionValues := prediction.Value().Data().([]float32)
|
|
|
|
return predictionValues, nil
|
|
}
|
|
|
|
|
|
// This function will take a neural network and input layer and prepare the network to compute a prediction
|
|
// We still need to run a virtual machine after calling this function in order for the prediction to be generated
|
|
func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgonia.Node)error{
|
|
|
|
// We copy pointer (says to do this in a resource i'm reading)
|
|
|
|
inputLayerCopy := inputLayer
|
|
|
|
// We multiply weights at each layer and perform rectification (ReLU) after each multiplication
|
|
|
|
weights1 := inputNetwork.weights1
|
|
weights2 := inputNetwork.weights2
|
|
weights3 := inputNetwork.weights3
|
|
weights4 := inputNetwork.weights4
|
|
|
|
layer1Product, err := gorgonia.Mul(inputLayerCopy, weights1)
|
|
if (err != nil) {
|
|
return errors.New("Layer 1 multiplication failed: " + err.Error())
|
|
}
|
|
|
|
layer1ProductRectified, err := gorgonia.Rectify(layer1Product)
|
|
if (err != nil){
|
|
return errors.New("Layer 1 rectification failed: " + err.Error())
|
|
}
|
|
|
|
layer2Product, err := gorgonia.Mul(layer1ProductRectified, weights2)
|
|
if (err != nil) {
|
|
return errors.New("Layer 2 multiplication failed: " + err.Error())
|
|
}
|
|
|
|
layer2ProductRectified, err := gorgonia.Rectify(layer2Product)
|
|
if (err != nil){
|
|
return errors.New("Layer 2 rectification failed: " + err.Error())
|
|
}
|
|
|
|
layer3Product, err := gorgonia.Mul(layer2ProductRectified, weights3)
|
|
if (err != nil) {
|
|
return errors.New("Layer 3 multiplication failed: " + err.Error())
|
|
}
|
|
|
|
layer3ProductRectified, err := gorgonia.Rectify(layer3Product)
|
|
if (err != nil){
|
|
return errors.New("Layer 3 rectification failed: " + err.Error())
|
|
}
|
|
|
|
layer4Product, err := gorgonia.Mul(layer3ProductRectified, weights4)
|
|
if (err != nil) {
|
|
return errors.New("Layer 4 multiplication failed: " + err.Error())
|
|
}
|
|
|
|
layer4ProductRectified, err := gorgonia.Rectify(layer4Product)
|
|
if (err != nil){
|
|
return errors.New("Layer 4 rectification failed: " + err.Error())
|
|
}
|
|
|
|
// We sigmoid the output to get the prediction
|
|
//TODO: Use SoftMax instead?
|
|
|
|
prediction, err := gorgonia.Sigmoid(layer4ProductRectified)
|
|
if (err != nil) {
|
|
return errors.New("Sigmoid failed: " + err.Error())
|
|
}
|
|
|
|
inputNetwork.prediction = prediction
|
|
|
|
return nil
|
|
}
|
|
|
|
|