Improved the Create Genetic Models utility and neural network training code. Models are now able to predict traits with some accuracy.
This commit is contained in:
parent
182175948f
commit
75331a22d3
7 changed files with 999 additions and 443 deletions
|
@ -6,6 +6,7 @@ Small and insignificant changes may not be included in this log.
|
|||
|
||||
## Unversioned Changes
|
||||
|
||||
* Improved the Create Genetic Models utility and neural network training code. Models are now able to predict traits with some accuracy. - *Simon Sarasova*
|
||||
* Improved ReadMe.md. - *Simon Sarasova*
|
||||
* Improved Seekia's slogan and Whitepaper.md. - *Simon Sarasova*
|
||||
* Added an Estimated Time Remaining label to 2 processes within the Create Genetic Models utility. - *Simon Sarasova*
|
||||
|
|
|
@ -9,4 +9,4 @@ Many other people have written code for modules which are imported by Seekia. Th
|
|||
|
||||
Name | Date Of First Commit | Number Of Commits
|
||||
--- | --- | ---
|
||||
Simon Sarasova | June 13, 2023 | 264
|
||||
Simon Sarasova | June 13, 2023 | 265
|
|
@ -20,22 +20,23 @@ import "gorgonia.org/gorgonia"
|
|||
import "gorgonia.org/tensor"
|
||||
|
||||
import mathRand "math/rand/v2"
|
||||
import "math"
|
||||
import "bytes"
|
||||
import "encoding/gob"
|
||||
import "slices"
|
||||
import "errors"
|
||||
|
||||
//import "log"
|
||||
|
||||
type NeuralNetwork struct{
|
||||
|
||||
// ExprGraph is a data structure for a directed acyclic graph (of expressions).
|
||||
graph *gorgonia.ExprGraph
|
||||
graph *gorgonia.ExprGraph
|
||||
|
||||
// These are the weights for each layer of neurons
|
||||
weights1 *gorgonia.Node
|
||||
weights2 *gorgonia.Node
|
||||
weights3 *gorgonia.Node
|
||||
weights4 *gorgonia.Node
|
||||
|
||||
// This is the computed prediction
|
||||
prediction *gorgonia.Node
|
||||
|
@ -97,7 +98,6 @@ type neuralNetworkForEncoding struct{
|
|||
Weights1 []float32
|
||||
Weights2 []float32
|
||||
Weights3 []float32
|
||||
Weights4 []float32
|
||||
|
||||
Weights1Rows int
|
||||
Weights1Columns int
|
||||
|
@ -105,8 +105,6 @@ type neuralNetworkForEncoding struct{
|
|||
Weights2Columns int
|
||||
Weights3Rows int
|
||||
Weights3Columns int
|
||||
Weights4Rows int
|
||||
Weights4Columns int
|
||||
}
|
||||
|
||||
func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte, error){
|
||||
|
@ -114,12 +112,10 @@ func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte,
|
|||
weights1 := inputNeuralNetwork.weights1
|
||||
weights2 := inputNeuralNetwork.weights2
|
||||
weights3 := inputNeuralNetwork.weights3
|
||||
weights4 := inputNeuralNetwork.weights4
|
||||
|
||||
weights1Slice := weights1.Value().Data().([]float32)
|
||||
weights2Slice := weights2.Value().Data().([]float32)
|
||||
weights3Slice := weights3.Value().Data().([]float32)
|
||||
weights4Slice := weights4.Value().Data().([]float32)
|
||||
|
||||
weights1Rows := weights1.Shape()[0]
|
||||
weights1Columns := weights1.Shape()[1]
|
||||
|
@ -127,14 +123,11 @@ func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte,
|
|||
weights2Columns := weights2.Shape()[1]
|
||||
weights3Rows := weights3.Shape()[0]
|
||||
weights3Columns := weights3.Shape()[1]
|
||||
weights4Rows := weights4.Shape()[0]
|
||||
weights4Columns := weights4.Shape()[1]
|
||||
|
||||
newNeuralNetworkForEncoding := neuralNetworkForEncoding{
|
||||
Weights1: weights1Slice,
|
||||
Weights2: weights2Slice,
|
||||
Weights3: weights3Slice,
|
||||
Weights4: weights4Slice,
|
||||
|
||||
Weights1Rows: weights1Rows,
|
||||
Weights1Columns: weights1Columns,
|
||||
|
@ -142,8 +135,6 @@ func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte,
|
|||
Weights2Columns: weights2Columns,
|
||||
Weights3Rows: weights3Rows,
|
||||
Weights3Columns: weights3Columns,
|
||||
Weights4Rows: weights4Rows,
|
||||
Weights4Columns: weights4Columns,
|
||||
}
|
||||
|
||||
buffer := new(bytes.Buffer)
|
||||
|
@ -176,7 +167,6 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
|
|||
weights1 := newNeuralNetworkForEncoding.Weights1
|
||||
weights2 := newNeuralNetworkForEncoding.Weights2
|
||||
weights3 := newNeuralNetworkForEncoding.Weights3
|
||||
weights4 := newNeuralNetworkForEncoding.Weights4
|
||||
|
||||
weights1Rows := newNeuralNetworkForEncoding.Weights1Rows
|
||||
weights1Columns := newNeuralNetworkForEncoding.Weights1Columns
|
||||
|
@ -184,8 +174,6 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
|
|||
weights2Columns := newNeuralNetworkForEncoding.Weights2Columns
|
||||
weights3Rows := newNeuralNetworkForEncoding.Weights3Rows
|
||||
weights3Columns := newNeuralNetworkForEncoding.Weights3Columns
|
||||
weights4Rows := newNeuralNetworkForEncoding.Weights4Rows
|
||||
weights4Columns := newNeuralNetworkForEncoding.Weights4Columns
|
||||
|
||||
// This is the graph object we add each layer to
|
||||
newGraph := gorgonia.NewGraph()
|
||||
|
@ -210,7 +198,6 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
|
|||
layer1 := getNewNeuralNetworkLayerWeights("Weights1", weights1Rows, weights1Columns, weights1)
|
||||
layer2 := getNewNeuralNetworkLayerWeights("Weights2", weights2Rows, weights2Columns, weights2)
|
||||
layer3 := getNewNeuralNetworkLayerWeights("Weights3", weights3Rows, weights3Columns, weights3)
|
||||
layer4 := getNewNeuralNetworkLayerWeights("Weights4", weights4Rows, weights4Columns, weights4)
|
||||
|
||||
newNeuralNetworkObject := NeuralNetwork{
|
||||
|
||||
|
@ -219,57 +206,204 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
|
|||
weights1: layer1,
|
||||
weights2: layer2,
|
||||
weights3: layer3,
|
||||
weights4: layer4,
|
||||
}
|
||||
|
||||
return newNeuralNetworkObject, nil
|
||||
}
|
||||
|
||||
//Outputs:
|
||||
// -int: Number of loci values that are known
|
||||
// -int: Number of loci values that are known and phased
|
||||
// -int: Number of loci
|
||||
// -error
|
||||
func GetLociInfoFromInputLayer(inputLayer []float32)(int, int, int, error){
|
||||
|
||||
// Each input layer has 3 neurons for each locus
|
||||
// Each rsID (locus) is represented by 3 neurons: LocusExists/LocusIsPhased, Allele1 Value, Allele2 Value
|
||||
// The LocusExists/LocusIsPhased neuron stores information like so:
|
||||
// -0 = Locus value is unknown
|
||||
// -0.5 = Locus Is known, phase is unknown
|
||||
// -1 = Locus Is Known, phase is known
|
||||
// Each rsID's neurons are concatenated together to form the inputLayer
|
||||
|
||||
inputLayerLength := len(inputLayer)
|
||||
|
||||
if (inputLayerLength%3 != 0){
|
||||
return 0, 0, 0, errors.New("GetInputInfoFromInputLayer called with invalid length input layer: Not evenly divisible by 4.")
|
||||
}
|
||||
|
||||
numberOfLoci := len(inputLayer)/3
|
||||
|
||||
numberOfLociValuesThatAreKnown := 0
|
||||
numberOfLociValuesThatAreKnownAndPhased := 0
|
||||
|
||||
for index, neuronValue := range inputLayer{
|
||||
|
||||
indexRemainder := index%3
|
||||
|
||||
if (indexRemainder == 0){
|
||||
|
||||
if (neuronValue == 0){
|
||||
continue
|
||||
}
|
||||
|
||||
numberOfLociValuesThatAreKnown += 1
|
||||
|
||||
/// We use an inequality instead of ==1 because floats are imprecise
|
||||
if (neuronValue > 0.99){
|
||||
numberOfLociValuesThatAreKnown += 1
|
||||
numberOfLociValuesThatAreKnownAndPhased += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numberOfLociValuesThatAreKnown == 0){
|
||||
return 0, 0, 0, errors.New("GetInputInfoFromInputLayer called with input layer with no known loci values.")
|
||||
}
|
||||
|
||||
return numberOfLociValuesThatAreKnown, numberOfLociValuesThatAreKnownAndPhased, numberOfLoci, nil
|
||||
}
|
||||
|
||||
// This function returns which outcome is being described from a neural network's final output layer
|
||||
// Outputs:
|
||||
// -string: Output Name (Example: "Blue")
|
||||
// -error
|
||||
func GetOutcomeNameFromOutputLayer(traitName string, verifyOutputLayer bool, outputLayer []float32)(string, error){
|
||||
|
||||
if (verifyOutputLayer == true){
|
||||
|
||||
// We make sure all neurons sum to 1
|
||||
|
||||
summedNeurons := float32(0)
|
||||
|
||||
for _, neuronValue := range outputLayer{
|
||||
summedNeurons += neuronValue
|
||||
}
|
||||
|
||||
// We allow a small amount of inaccuracy due to the imprecise nature of floats.
|
||||
if (summedNeurons > 1.1 || summedNeurons < .99){
|
||||
summedNeuronsString := helpers.ConvertFloat32ToString(summedNeurons)
|
||||
return "", errors.New("GetOutcomeNameFromOutputLayer called with layer containing neuron values which don't sum to 1: " + summedNeuronsString)
|
||||
}
|
||||
}
|
||||
|
||||
getBiggestNeuronIndex := func()int{
|
||||
|
||||
biggestNeuronValue := float32(0)
|
||||
biggestNeuronIndex := 0
|
||||
|
||||
for index, neuronValue := range outputLayer{
|
||||
|
||||
if (index == 0){
|
||||
biggestNeuronValue = neuronValue
|
||||
} else {
|
||||
|
||||
if (neuronValue > biggestNeuronValue){
|
||||
biggestNeuronValue = neuronValue
|
||||
biggestNeuronIndex = index
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return biggestNeuronIndex
|
||||
}
|
||||
|
||||
biggestNeuronIndex := getBiggestNeuronIndex()
|
||||
|
||||
switch traitName{
|
||||
|
||||
case "Eye Color":{
|
||||
|
||||
if (len(outputLayer) != 4){
|
||||
return "", errors.New("GetOutcomeNameFromOutputLayer called with invalid length output layer.")
|
||||
}
|
||||
|
||||
switch biggestNeuronIndex{
|
||||
case 0:{
|
||||
return "Blue", nil
|
||||
}
|
||||
case 1:{
|
||||
return "Green", nil
|
||||
}
|
||||
case 2:{
|
||||
return "Hazel", nil
|
||||
}
|
||||
case 3:{
|
||||
return "Brown", nil
|
||||
}
|
||||
}
|
||||
}
|
||||
case "Lactose Tolerance":{
|
||||
|
||||
if (len(outputLayer) != 2){
|
||||
return "", errors.New("GetOutcomeNameFromOutputLayer called with invalid length output layer.")
|
||||
}
|
||||
|
||||
switch biggestNeuronIndex{
|
||||
case 0:{
|
||||
return "Tolerant", nil
|
||||
}
|
||||
case 1:{
|
||||
return "Intolerant", nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", errors.New("GetOutcomeNameFromOutputLayer called with unknown traitName: " + traitName)
|
||||
}
|
||||
|
||||
|
||||
//Outputs:
|
||||
// -int: Layer 1 neuron count (input layer)
|
||||
// -int: Layer 2 neuron count
|
||||
// -int: Layer 3 neuron count
|
||||
// -int: Layer 4 neuron count
|
||||
// -int: Layer 5 neuron count (output layer)
|
||||
// -int: Layer 4 neuron count (output layer)
|
||||
// -error
|
||||
func getNeuralNetworkLayerSizes(traitName string)(int, int, int, int, int, error){
|
||||
func getNeuralNetworkLayerSizes(traitName string)(int, int, int, int, error){
|
||||
|
||||
switch traitName{
|
||||
|
||||
case "Eye Color":{
|
||||
|
||||
// There are 376 input neurons
|
||||
// There are 282 input neurons
|
||||
// There are 4 output neurons, each representing a color
|
||||
// There are 4 colors: Blue, Green, Brown, Hazel
|
||||
|
||||
return 376, 200, 100, 50, 4, nil
|
||||
return 282, 100, 50, 4, nil
|
||||
}
|
||||
case "Lactose Tolerance":{
|
||||
|
||||
// There are 6 input neurons
|
||||
// There are 2 output neurons, each representing a tolerance: Tolerant, Intolerant
|
||||
return 6, 4, 3, 2, nil
|
||||
}
|
||||
}
|
||||
|
||||
return 0, 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown traitName: " + traitName)
|
||||
return 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown traitName: " + traitName)
|
||||
}
|
||||
|
||||
//This function converts a genome allele to a neuron to use in a tensor
|
||||
// A value of 0 means that the allele is unknown
|
||||
func convertAlleleToNeuron(allele string)(float32, error){
|
||||
|
||||
switch allele{
|
||||
|
||||
case "C":{
|
||||
|
||||
return 0, nil
|
||||
return 0.16, nil
|
||||
}
|
||||
case "A":{
|
||||
|
||||
return 0.2, nil
|
||||
return 0.32, nil
|
||||
}
|
||||
case "T":{
|
||||
|
||||
return 0.4, nil
|
||||
return 0.48, nil
|
||||
}
|
||||
case "G":{
|
||||
|
||||
return 0.6, nil
|
||||
return 0.64, nil
|
||||
}
|
||||
case "I":{
|
||||
|
||||
|
@ -295,7 +429,7 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
|
|||
userPhenotypeDataObject readBiobankData.PhenotypeData_OpenSNP,
|
||||
userLocusValuesMap map[int64]locusValue.LocusValue)(bool, []TrainingData, error){
|
||||
|
||||
if (traitName != "Eye Color"){
|
||||
if (traitName != "Eye Color" && traitName != "Lactose Tolerance"){
|
||||
return false, nil, errors.New("CreateGeneticPredictionTrainingData_OpenSNP called with unknown traitName: " + traitName)
|
||||
}
|
||||
|
||||
|
@ -315,11 +449,15 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
|
|||
// Each TrainingData holds a variation of the user's genome rsID values
|
||||
// We add many rows with withheld data to improve training data
|
||||
|
||||
numberOfInputLayerRows, _, _, _, numberOfOutputLayerRows, err := getNeuralNetworkLayerSizes(traitName)
|
||||
numberOfInputLayerRows, _, _, numberOfOutputLayerRows, err := getNeuralNetworkLayerSizes(traitName)
|
||||
if (err != nil) { return false, nil, err }
|
||||
|
||||
// Each rsID is represented by 4 neurons: LocusExists, LocusIsPhased, Allele1 Value, Allele2 Value
|
||||
expectedNumberOfInputLayerRows := len(traitRSIDs) * 4
|
||||
// Each rsID is represented by 3 neurons: LocusExists/LocusIsPhased, Allele1 Value, Allele2 Value
|
||||
// The LocusExists/LocusIsPhased neuron stores information like so:
|
||||
// -0 = Locus value is unknown
|
||||
// -0.5 = Locus Is known, phase is unknown
|
||||
// -1 = Locus Is Known, phase is known
|
||||
expectedNumberOfInputLayerRows := len(traitRSIDs) * 3
|
||||
|
||||
if (numberOfInputLayerRows != expectedNumberOfInputLayerRows){
|
||||
|
||||
|
@ -365,33 +503,52 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
|
|||
// -error
|
||||
getUserTraitValueNeurons := func()(bool, []float32, error){
|
||||
|
||||
if (traitName == "Eye Color"){
|
||||
switch traitName{
|
||||
|
||||
userEyeColorIsKnown := userPhenotypeDataObject.EyeColorIsKnown
|
||||
if (userEyeColorIsKnown == false){
|
||||
return false, nil, nil
|
||||
case "Eye Color":{
|
||||
|
||||
userEyeColorIsKnown := userPhenotypeDataObject.EyeColorIsKnown
|
||||
if (userEyeColorIsKnown == false){
|
||||
return false, nil, nil
|
||||
}
|
||||
|
||||
userEyeColor := userPhenotypeDataObject.EyeColor
|
||||
|
||||
if (userEyeColor == "Blue"){
|
||||
|
||||
return true, []float32{1, 0, 0, 0}, nil
|
||||
|
||||
} else if (userEyeColor == "Green"){
|
||||
|
||||
return true, []float32{0, 1, 0, 0}, nil
|
||||
|
||||
} else if (userEyeColor == "Hazel"){
|
||||
|
||||
return true, []float32{0, 0, 1, 0}, nil
|
||||
|
||||
} else if (userEyeColor == "Brown"){
|
||||
|
||||
return true, []float32{0, 0, 0, 1}, nil
|
||||
}
|
||||
|
||||
return false, nil, errors.New("Malformed userPhenotypeDataObject: Invalid eyeColor: " + userEyeColor)
|
||||
}
|
||||
case "Lactose Tolerance":{
|
||||
|
||||
userEyeColor := userPhenotypeDataObject.EyeColor
|
||||
userLactoseToleranceIsKnown := userPhenotypeDataObject.LactoseToleranceIsKnown
|
||||
if (userLactoseToleranceIsKnown == false){
|
||||
return false, nil, nil
|
||||
}
|
||||
|
||||
if (userEyeColor == "Blue"){
|
||||
userLactoseTolerance := userPhenotypeDataObject.LactoseTolerance
|
||||
|
||||
return true, []float32{1, 0, 0, 0}, nil
|
||||
if (userLactoseTolerance == true){
|
||||
|
||||
} else if (userEyeColor == "Green"){
|
||||
return true, []float32{1, 0}, nil
|
||||
}
|
||||
|
||||
return true, []float32{0, 1, 0, 0}, nil
|
||||
|
||||
} else if (userEyeColor == "Hazel"){
|
||||
|
||||
return true, []float32{0, 0, 1, 0}, nil
|
||||
|
||||
} else if (userEyeColor == "Brown"){
|
||||
|
||||
return true, []float32{0, 0, 0, 1}, nil
|
||||
return true, []float32{0, 1}, nil
|
||||
}
|
||||
|
||||
return false, nil, errors.New("Malformed userPhenotypeDataObject: Invalid eyeColor: " + userEyeColor)
|
||||
}
|
||||
|
||||
return false, nil, errors.New("Unknown traitName: " + traitName)
|
||||
|
@ -409,6 +566,12 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
|
|||
return false, nil, errors.New("getUserTraitValueNeurons returning invalid length layer slice.")
|
||||
}
|
||||
|
||||
// We want the initial training data to be the same for each call of this function that has the same input parameters
|
||||
// This is a necessary step so our neural network models will be reproducable
|
||||
// Reproducable means that other people can run the code and produce the same models, byte-for-byte
|
||||
|
||||
pseudorandomNumberGenerator := mathRand.New(mathRand.NewPCG(1, 2))
|
||||
|
||||
// We create 110 examples per user.
|
||||
// We randomize allele order whenever phase for the locus is unknown
|
||||
// 50% of the time we randomize allele order even when phase is known to train the model on unphased data
|
||||
|
@ -462,33 +625,41 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
|
|||
|
||||
probabilityOfUsingLoci := getProbabilityOfUsingLoci()
|
||||
|
||||
// In the inputLayer, each locus value is represented by 4 neurons:
|
||||
// 1. LocusExists (Either 0 or 1)
|
||||
// 2. LocusIsPhased (Either 0 or 1)
|
||||
// 3. Allele1 Locus Value (Value between 0-1)
|
||||
// 4. Allele2 Locus Value (Value between 0-1)
|
||||
// In the inputLayer, each locus value is represented by 3 neurons:
|
||||
// 1. LocusExists/LocusIsPhased
|
||||
// -0 = Locus value is unknown
|
||||
// -0.5 = Locus Is known, phase is unknown
|
||||
// -1 = Locus Is Known, phase is known
|
||||
// 2. Allele1 Locus Value (Value between 0-1)
|
||||
// -0 = Value is unknown
|
||||
// 3. Allele2 Locus Value (Value between 0-1)
|
||||
// -0 = Value is unknown
|
||||
|
||||
inputLayerLength := len(traitRSIDsList) * 4
|
||||
anyLocusExists := false
|
||||
|
||||
inputLayerLength := len(traitRSIDsList) * 3
|
||||
|
||||
inputLayer := make([]float32, 0, inputLayerLength)
|
||||
|
||||
for _, rsID := range traitRSIDsList{
|
||||
|
||||
useLocusBool, err := helpers.GetRandomBoolWithProbability(probabilityOfUsingLoci)
|
||||
if (err != nil) { return false, nil, err }
|
||||
if (useLocusBool == false){
|
||||
randomFloat := pseudorandomNumberGenerator.Float64()
|
||||
if (randomFloat > probabilityOfUsingLoci){
|
||||
// This if statement has a !probabilityOfUsingLoci chance of being true.
|
||||
// We are skipping this locus
|
||||
inputLayer = append(inputLayer, 0, 0, 0, 0)
|
||||
inputLayer = append(inputLayer, 0, 0, 0)
|
||||
continue
|
||||
}
|
||||
|
||||
userLocusValue, exists := userLocusValuesMap[rsID]
|
||||
if (exists == false){
|
||||
// This user's locus value is unknown
|
||||
inputLayer = append(inputLayer, 0, 0, 0, 0)
|
||||
inputLayer = append(inputLayer, 0, 0, 0)
|
||||
continue
|
||||
}
|
||||
|
||||
anyLocusExists = true
|
||||
|
||||
getLocusAlleles := func()(string, string){
|
||||
|
||||
locusAllele1 := userLocusValue.Base1Value
|
||||
|
@ -498,9 +669,11 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
|
|||
return locusAllele1, locusAllele2
|
||||
}
|
||||
|
||||
randomBool := helpers.GetRandomBool()
|
||||
// We randomize the phase of the locus
|
||||
|
||||
if (randomBool == false){
|
||||
randomNumber := pseudorandomNumberGenerator.IntN(2)
|
||||
if (randomNumber == 1){
|
||||
// This has a 50% chance of being true.
|
||||
return locusAllele1, locusAllele2
|
||||
}
|
||||
|
||||
|
@ -514,10 +687,16 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
|
|||
locusAllele2NeuronValue, err := convertAlleleToNeuron(locusAllele2)
|
||||
if (err != nil) { return false, nil, err }
|
||||
|
||||
getLocusIsPhasedNeuronValue := func()float32{
|
||||
getLocusIsKnownAndPhasedNeuronValue := func()float32{
|
||||
|
||||
if (locusAllele1 == locusAllele2){
|
||||
// Phase of locus must be known.
|
||||
// Swapping the loci would change nothing.
|
||||
return 1
|
||||
}
|
||||
|
||||
if (randomizePhaseBool == true){
|
||||
return 0
|
||||
return 0.5
|
||||
}
|
||||
|
||||
locusIsPhased := userLocusValue.LocusIsPhased
|
||||
|
@ -525,12 +704,18 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
|
|||
return 1
|
||||
}
|
||||
|
||||
return 0
|
||||
return 0.5
|
||||
}
|
||||
|
||||
locusIsPhasedNeuronValue := getLocusIsPhasedNeuronValue()
|
||||
locusIsKnownAndPhasedNeuronValue := getLocusIsKnownAndPhasedNeuronValue()
|
||||
|
||||
inputLayer = append(inputLayer, 1, locusIsPhasedNeuronValue, locusAllele1NeuronValue, locusAllele2NeuronValue)
|
||||
inputLayer = append(inputLayer, locusIsKnownAndPhasedNeuronValue, locusAllele1NeuronValue, locusAllele2NeuronValue)
|
||||
}
|
||||
|
||||
if (anyLocusExists == false){
|
||||
// We have 0 known loci for this training example.
|
||||
// We won't add it to the training data.
|
||||
continue
|
||||
}
|
||||
|
||||
userTraitValueNeuronsCopy := slices.Clone(userTraitValueNeurons)
|
||||
|
@ -548,7 +733,7 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
|
|||
|
||||
func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error){
|
||||
|
||||
layer1NeuronCount, layer2NeuronCount, layer3NeuronCount, layer4NeuronCount, layer5NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
|
||||
layer1NeuronCount, layer2NeuronCount, layer3NeuronCount, layer4NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
|
||||
if (err != nil) { return nil, err }
|
||||
|
||||
// This is the graph object we add each layer to
|
||||
|
@ -572,8 +757,12 @@ func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error)
|
|||
|
||||
for i:=0; i < totalNumberOfNeurons; i++{
|
||||
|
||||
// This returns a pseudo-random number between 0 and 1
|
||||
newWeight := pseudorandomNumberGenerator.Float32()
|
||||
// We initialize the weights with He initialization
|
||||
// He initialization = (0 +/- sqrt(2/n) where n is the number of nodes in the prior layer)
|
||||
|
||||
// pseudorandomNumberGenerator.Float32() returns a pseudo-random number between 0 and 1
|
||||
|
||||
newWeight := ((pseudorandomNumberGenerator.Float32()-0.5)*2) * float32(math.Sqrt(float64(2)/float64(layerNeuronRows)))
|
||||
|
||||
layerInitialWeightsList = append(layerInitialWeightsList, newWeight)
|
||||
}
|
||||
|
@ -594,7 +783,6 @@ func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error)
|
|||
layer1 := getNewNeuralNetworkLayerWeights("Weights1", layer1NeuronCount, layer2NeuronCount)
|
||||
layer2 := getNewNeuralNetworkLayerWeights("Weights2", layer2NeuronCount, layer3NeuronCount)
|
||||
layer3 := getNewNeuralNetworkLayerWeights("Weights3", layer3NeuronCount, layer4NeuronCount)
|
||||
layer4 := getNewNeuralNetworkLayerWeights("Weights4", layer4NeuronCount, layer5NeuronCount)
|
||||
|
||||
newNeuralNetworkObject := NeuralNetwork{
|
||||
|
||||
|
@ -603,7 +791,6 @@ func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error)
|
|||
weights1: layer1,
|
||||
weights2: layer2,
|
||||
weights3: layer3,
|
||||
weights4: layer4,
|
||||
}
|
||||
|
||||
return &newNeuralNetworkObject, nil
|
||||
|
@ -616,75 +803,68 @@ func (inputNetwork *NeuralNetwork)getLearnables()gorgonia.Nodes{
|
|||
weights1 := inputNetwork.weights1
|
||||
weights2 := inputNetwork.weights2
|
||||
weights3 := inputNetwork.weights3
|
||||
weights4 := inputNetwork.weights4
|
||||
|
||||
result := gorgonia.Nodes{weights1, weights2, weights3, weights4}
|
||||
result := gorgonia.Nodes{weights1, weights2, weights3}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
|
||||
// This function will train the neural network
|
||||
// The function is passed a single TrainingData example to train on
|
||||
//
|
||||
// TODO: This function doesn't work
|
||||
// The weights do not change during training
|
||||
// I think the layer dimensions are wrong?
|
||||
//
|
||||
func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, trainingData TrainingData)error{
|
||||
// The function is passed a batch of TrainingData examples to train on
|
||||
// Inputs:
|
||||
// -string: Trait Name
|
||||
// -*NeuralNetwork
|
||||
// -func()(bool, bool, TrainingData, error): Function to get the next training data.
|
||||
// -Outputs:
|
||||
// -bool: User stopped the training run
|
||||
// -bool: Another training data exists
|
||||
// -TrainingData: The next training data example
|
||||
// -error
|
||||
// Outputs:
|
||||
// -bool: Process completed (was not stopped mid-way)
|
||||
// -error
|
||||
func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, getNextTrainingData func()(bool, bool, TrainingData, error))(bool, error){
|
||||
|
||||
layer1NeuronCount, _, _, _, layer5NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
|
||||
if (err != nil) { return err }
|
||||
layer1NeuronCount, _, _, layer4NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
|
||||
if (err != nil) { return false, err }
|
||||
|
||||
neuralNetworkGraph := neuralNetworkObject.graph
|
||||
|
||||
// This inputLayer contains the allele values for this training example
|
||||
trainingDataInputLayer := trainingData.InputLayer
|
||||
|
||||
// This outputLayer contains the phenotype for this training example (example: Eye color of Blue)
|
||||
trainingDataOutputLayer := trainingData.OutputLayer
|
||||
|
||||
// We convert our inputTensor and outputTensor to the type *Node
|
||||
|
||||
inputTensorShapeObject := tensor.WithShape(1, layer1NeuronCount)
|
||||
outputTensorShapeObject := tensor.WithShape(1, layer5NeuronCount)
|
||||
|
||||
inputTensorBacking := tensor.WithBacking(trainingDataInputLayer)
|
||||
outputTensorBacking := tensor.WithBacking(trainingDataOutputLayer)
|
||||
|
||||
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
|
||||
outputTensor := tensor.New(outputTensorShapeObject, outputTensorBacking)
|
||||
// We first create the input and output nodes
|
||||
// They don't have any values yet.
|
||||
|
||||
trainingDataInputNode := gorgonia.NewMatrix(neuralNetworkGraph,
|
||||
tensor.Float32,
|
||||
gorgonia.WithName("input"),
|
||||
gorgonia.WithName("Input"),
|
||||
gorgonia.WithShape(1, layer1NeuronCount),
|
||||
gorgonia.WithValue(inputTensor),
|
||||
)
|
||||
|
||||
trainingDataOutputNode := gorgonia.NewMatrix(neuralNetworkGraph,
|
||||
trainingDataExpectedOutputNode := gorgonia.NewMatrix(neuralNetworkGraph,
|
||||
tensor.Float32,
|
||||
gorgonia.WithName("expectedOutput"),
|
||||
gorgonia.WithShape(1, layer5NeuronCount),
|
||||
gorgonia.WithValue(outputTensor),
|
||||
gorgonia.WithName("ExpectedOutput"),
|
||||
gorgonia.WithShape(1, layer4NeuronCount),
|
||||
)
|
||||
|
||||
err = neuralNetworkObject.prepareToComputePrediction(trainingDataInputNode)
|
||||
if (err != nil) { return err }
|
||||
err = neuralNetworkObject.buildNeuralNetwork(trainingDataInputNode)
|
||||
if (err != nil) { return false, err }
|
||||
|
||||
// This computes the loss (how accurate was our prediction)
|
||||
losses, err := gorgonia.Sub(trainingDataOutputNode, neuralNetworkObject.prediction)
|
||||
if (err != nil) { return err }
|
||||
losses, err := gorgonia.Sub(trainingDataExpectedOutputNode, neuralNetworkObject.prediction)
|
||||
if (err != nil) { return false, err }
|
||||
|
||||
// Cost is an average of the losses
|
||||
cost, err := gorgonia.Mean(losses)
|
||||
if (err != nil) { return err }
|
||||
squareOfLosses, err := gorgonia.Square(losses)
|
||||
if (err != nil) { return false, err }
|
||||
|
||||
// Cost is an average of the square of losses
|
||||
cost, err := gorgonia.Mean(squareOfLosses)
|
||||
if (err != nil) { return false, err }
|
||||
|
||||
neuralNetworkLearnables := neuralNetworkObject.getLearnables()
|
||||
|
||||
// Grad takes a scalar cost node and a list of with-regards-to, and returns the gradient
|
||||
_, err = gorgonia.Grad(cost, neuralNetworkLearnables...)
|
||||
if (err != nil) { return err }
|
||||
if (err != nil) { return false, err }
|
||||
|
||||
bindDualValues := gorgonia.BindDualValues(neuralNetworkLearnables...)
|
||||
|
||||
|
@ -692,29 +872,69 @@ func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, tr
|
|||
virtualMachine := gorgonia.NewTapeMachine(neuralNetworkGraph, bindDualValues)
|
||||
|
||||
// This is the learn rate or step size for the solver.
|
||||
learningRate := gorgonia.WithLearnRate(.001)
|
||||
learningRate := gorgonia.WithLearnRate(.01)
|
||||
|
||||
// This clips the gradient if it gets too crazy
|
||||
//gradientClip := gorgonia.WithClip(5)
|
||||
// gradientClip := gorgonia.WithClip(.05)
|
||||
|
||||
solver := gorgonia.NewVanillaSolver(learningRate)
|
||||
//solver := gorgonia.NewVanillaSolver(learningRate, gradientClip)
|
||||
// solver := gorgonia.NewVanillaSolver(learningRate, gradientClip)
|
||||
defer virtualMachine.Close()
|
||||
|
||||
for i:=0; i < 10; i++{
|
||||
for {
|
||||
|
||||
err = virtualMachine.RunAll()
|
||||
if (err != nil) { return err }
|
||||
userStoppedTraining, nextDataExists, trainingDataObject, err := getNextTrainingData()
|
||||
if (err != nil) { return false, err }
|
||||
if (userStoppedTraining == true){
|
||||
// User manually stopped the training run
|
||||
return false, nil
|
||||
}
|
||||
if (nextDataExists == false){
|
||||
// We are done training
|
||||
break
|
||||
}
|
||||
|
||||
// NodesToValueGrads is a utility function that converts a Nodes to a slice of ValueGrad for the solver
|
||||
valueGrads := gorgonia.NodesToValueGrads(neuralNetworkLearnables)
|
||||
// We convert our input training data slices to the type *Dense and assign them to our nodes
|
||||
|
||||
err := solver.Step(valueGrads)
|
||||
if (err != nil) { return err }
|
||||
// This inputLayer contains the allele values for this training example
|
||||
trainingDataInputLayer := trainingDataObject.InputLayer
|
||||
|
||||
virtualMachine.Reset()
|
||||
// This outputLayer contains the phenotype for this training example (example: Eye color of Blue)
|
||||
trainingDataOutputLayer := trainingDataObject.OutputLayer
|
||||
|
||||
inputTensorShapeObject := tensor.WithShape(1, layer1NeuronCount)
|
||||
outputTensorShapeObject := tensor.WithShape(1, layer4NeuronCount)
|
||||
|
||||
inputTensorBacking := tensor.WithBacking(trainingDataInputLayer)
|
||||
outputTensorBacking := tensor.WithBacking(trainingDataOutputLayer)
|
||||
|
||||
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
|
||||
outputTensor := tensor.New(outputTensorShapeObject, outputTensorBacking)
|
||||
|
||||
err = gorgonia.Let(trainingDataInputNode, inputTensor)
|
||||
if (err != nil) { return false, err }
|
||||
|
||||
err = gorgonia.Let(trainingDataExpectedOutputNode, outputTensor)
|
||||
if (err != nil) { return false, err }
|
||||
|
||||
// for i:=0; i < 10; i++{
|
||||
|
||||
err = virtualMachine.RunAll()
|
||||
if (err != nil) { return false, err }
|
||||
|
||||
// NodesToValueGrads is a utility function that converts a Nodes to a slice of ValueGrad for the solver
|
||||
valueGrads := gorgonia.NodesToValueGrads(neuralNetworkLearnables)
|
||||
|
||||
err = solver.Step(valueGrads)
|
||||
if (err != nil) { return false, err }
|
||||
|
||||
virtualMachine.Reset()
|
||||
// }
|
||||
|
||||
// log.Println(cost.Value())
|
||||
}
|
||||
|
||||
return nil
|
||||
return true, nil
|
||||
}
|
||||
|
||||
|
||||
|
@ -726,28 +946,29 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, inputLayer
|
|||
|
||||
neuralNetworkGraph := inputNeuralNetwork.graph
|
||||
|
||||
// We convert the inputLayer []float32 to a node object
|
||||
|
||||
numberOfInputNeurons := len(inputLayer)
|
||||
|
||||
inputNode := gorgonia.NewMatrix(neuralNetworkGraph,
|
||||
tensor.Float32,
|
||||
gorgonia.WithName("Input"),
|
||||
gorgonia.WithShape(1, numberOfInputNeurons),
|
||||
)
|
||||
|
||||
// We convert the inputLayer []float32 to a tensor *Dense object
|
||||
|
||||
inputTensorShapeObject := tensor.WithShape(1, numberOfInputNeurons)
|
||||
|
||||
inputTensorBacking := tensor.WithBacking(inputLayer)
|
||||
|
||||
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
|
||||
|
||||
inputNode := gorgonia.NewMatrix(neuralNetworkGraph,
|
||||
tensor.Float32,
|
||||
gorgonia.WithName("input"),
|
||||
gorgonia.WithShape(1, numberOfInputNeurons),
|
||||
gorgonia.WithValue(inputTensor),
|
||||
)
|
||||
err := gorgonia.Let(inputNode, inputTensor)
|
||||
if (err != nil) { return nil, err }
|
||||
|
||||
err := inputNeuralNetwork.prepareToComputePrediction(inputNode)
|
||||
|
||||
err = inputNeuralNetwork.buildNeuralNetwork(inputNode)
|
||||
if (err != nil){ return nil, err }
|
||||
|
||||
prediction := inputNeuralNetwork.prediction
|
||||
|
||||
// Now we create a virtual machine to compute the prediction
|
||||
|
||||
neuralNetworkLearnables := inputNeuralNetwork.getLearnables()
|
||||
|
@ -759,26 +980,25 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, inputLayer
|
|||
err = virtualMachine.RunAll()
|
||||
if (err != nil) { return nil, err }
|
||||
|
||||
prediction := inputNeuralNetwork.prediction
|
||||
|
||||
predictionValues := prediction.Value().Data().([]float32)
|
||||
|
||||
return predictionValues, nil
|
||||
}
|
||||
|
||||
|
||||
// This function will take a neural network and input layer and prepare the network to compute a prediction
|
||||
// We still need to run a virtual machine after calling this function in order for the prediction to be generated
|
||||
func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgonia.Node)error{
|
||||
// This function will take a neural network and input layer and build the network to be able to compute a prediction
|
||||
// We need to run a virtual machine after calling this function in order for the prediction to be generated
|
||||
func (inputNetwork *NeuralNetwork)buildNeuralNetwork(inputLayer *gorgonia.Node)error{
|
||||
|
||||
// We copy pointer (says to do this in a resource i'm reading)
|
||||
// We copy node pointer (says to do this in a resource i'm reading)
|
||||
|
||||
inputLayerCopy := inputLayer
|
||||
|
||||
// We multiply weights at each layer and perform rectification (ReLU) after each multiplication
|
||||
// We multiply weights at each layer and perform sigmoid after each multiplication
|
||||
|
||||
weights1 := inputNetwork.weights1
|
||||
weights2 := inputNetwork.weights2
|
||||
weights3 := inputNetwork.weights3
|
||||
weights4 := inputNetwork.weights4
|
||||
|
||||
layer1Product, err := gorgonia.Mul(inputLayerCopy, weights1)
|
||||
if (err != nil) {
|
||||
|
@ -787,9 +1007,11 @@ func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgoni
|
|||
|
||||
layer1ProductRectified, err := gorgonia.Rectify(layer1Product)
|
||||
if (err != nil){
|
||||
return errors.New("Layer 1 rectification failed: " + err.Error())
|
||||
return errors.New("Layer 1 Rectify failed: " + err.Error())
|
||||
}
|
||||
|
||||
weights2 := inputNetwork.weights2
|
||||
|
||||
layer2Product, err := gorgonia.Mul(layer1ProductRectified, weights2)
|
||||
if (err != nil) {
|
||||
return errors.New("Layer 2 multiplication failed: " + err.Error())
|
||||
|
@ -797,35 +1019,21 @@ func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgoni
|
|||
|
||||
layer2ProductRectified, err := gorgonia.Rectify(layer2Product)
|
||||
if (err != nil){
|
||||
return errors.New("Layer 2 rectification failed: " + err.Error())
|
||||
return errors.New("Layer 2 Rectify failed: " + err.Error())
|
||||
}
|
||||
|
||||
weights3 := inputNetwork.weights3
|
||||
|
||||
layer3Product, err := gorgonia.Mul(layer2ProductRectified, weights3)
|
||||
if (err != nil) {
|
||||
return errors.New("Layer 3 multiplication failed: " + err.Error())
|
||||
}
|
||||
|
||||
layer3ProductRectified, err := gorgonia.Rectify(layer3Product)
|
||||
if (err != nil){
|
||||
return errors.New("Layer 3 rectification failed: " + err.Error())
|
||||
}
|
||||
// We SoftMax the output to get the prediction
|
||||
|
||||
layer4Product, err := gorgonia.Mul(layer3ProductRectified, weights4)
|
||||
prediction, err := gorgonia.SoftMax(layer3Product)
|
||||
if (err != nil) {
|
||||
return errors.New("Layer 4 multiplication failed: " + err.Error())
|
||||
}
|
||||
|
||||
layer4ProductRectified, err := gorgonia.Rectify(layer4Product)
|
||||
if (err != nil){
|
||||
return errors.New("Layer 4 rectification failed: " + err.Error())
|
||||
}
|
||||
|
||||
// We sigmoid the output to get the prediction
|
||||
//TODO: Use SoftMax instead?
|
||||
|
||||
prediction, err := gorgonia.Sigmoid(layer4ProductRectified)
|
||||
if (err != nil) {
|
||||
return errors.New("Sigmoid failed: " + err.Error())
|
||||
return errors.New("SoftMax failed: " + err.Error())
|
||||
}
|
||||
|
||||
inputNetwork.prediction = prediction
|
||||
|
|
|
@ -120,8 +120,20 @@ func GetGenomesWithMetadataListFromRawGenomesList(inputGenomesList []RawGenomeWi
|
|||
locusAllele1 := locusBasePairValue.Allele1
|
||||
locusAllele2 := locusBasePairValue.Allele2
|
||||
|
||||
getLocusIsPhasedBool := func()bool{
|
||||
|
||||
if (locusAllele1 == locusAllele2){
|
||||
// Locus has to be phased, because phase flip does not change value
|
||||
return true
|
||||
}
|
||||
|
||||
return genomeIsPhased
|
||||
}
|
||||
|
||||
locusIsPhased := getLocusIsPhasedBool()
|
||||
|
||||
locusValueObject := locusValue.LocusValue{
|
||||
LocusIsPhased: genomeIsPhased,
|
||||
LocusIsPhased: locusIsPhased,
|
||||
Base1Value: locusAllele1,
|
||||
Base2Value: locusAllele2,
|
||||
}
|
||||
|
@ -466,9 +478,20 @@ func GetGenomesWithMetadataListFromRawGenomesList(inputGenomesList []RawGenomeWi
|
|||
// The OnlyExcludeConflicts will only omit when there is a tie
|
||||
// The OnlyIncludeShared requires at least 2 to agree
|
||||
|
||||
getLocusIsPhased_OnlyExcludeConflicts := func()bool{
|
||||
if (locusBase1 == locusBase2){
|
||||
// These kinds of loci are always phased, becauses swapping the alleles changes nothing.
|
||||
return true
|
||||
}
|
||||
|
||||
return phaseIsKnown_OnlyExcludeConflicts
|
||||
}
|
||||
|
||||
locusIsPhased_OnlyExcludeConflicts := getLocusIsPhased_OnlyExcludeConflicts()
|
||||
|
||||
onlyExcludeConflictsLocusValue := locusValue.LocusValue{
|
||||
|
||||
LocusIsPhased: phaseIsKnown_OnlyExcludeConflicts,
|
||||
LocusIsPhased: locusIsPhased_OnlyExcludeConflicts,
|
||||
Base1Value: locusBase1,
|
||||
Base2Value: locusBase2,
|
||||
}
|
||||
|
@ -477,8 +500,19 @@ func GetGenomesWithMetadataListFromRawGenomesList(inputGenomesList []RawGenomeWi
|
|||
|
||||
if (mostRecordedSortedBasePairCount >= 2){
|
||||
|
||||
getLocusIsPhased_OnlyIncludeShared := func()bool{
|
||||
if (locusBase1 == locusBase2){
|
||||
// These kinds of loci are always phased, becauses swapping the alleles changes nothing.
|
||||
return true
|
||||
}
|
||||
|
||||
return phaseIsKnown_OnlyIncludeShared
|
||||
}
|
||||
|
||||
locusIsPhased_OnlyIncludeShared := getLocusIsPhased_OnlyIncludeShared()
|
||||
|
||||
onlyIncludeSharedLocusValue := locusValue.LocusValue{
|
||||
LocusIsPhased: phaseIsKnown_OnlyIncludeShared,
|
||||
LocusIsPhased: locusIsPhased_OnlyIncludeShared,
|
||||
Base1Value: locusBase1,
|
||||
Base2Value: locusBase2,
|
||||
}
|
||||
|
|
|
@ -246,6 +246,13 @@ func CeilFloat64ToInt(input float64)(int, error){
|
|||
return ceiledInt, nil
|
||||
}
|
||||
|
||||
func ConvertFloat32ToString(input float32) string{
|
||||
|
||||
result := strconv.FormatFloat(float64(input), 'f', 5, 32)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func ConvertFloat64ToString(input float64) string{
|
||||
|
||||
result := strconv.FormatFloat(input, 'f', 5, 64)
|
||||
|
|
|
@ -125,7 +125,7 @@ func getEyeColorTraitObject()Trait{
|
|||
TraitDescription: "The color of a person's eyes.",
|
||||
LociList: eyeColorLociList,
|
||||
RulesList: []TraitRule{},
|
||||
OutcomesList: []string{},
|
||||
OutcomesList: []string{"Blue", "Green", "Hazel", "Brown"},
|
||||
References: referencesMap,
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue