Compare commits

...

2 commits

8 changed files with 1000 additions and 444 deletions

View file

@ -6,6 +6,7 @@ Small and insignificant changes may not be included in this log.
## Unversioned Changes
* Improved the Create Genetic Models utility and neural network training code. Models are now able to predict traits with some accuracy. - *Simon Sarasova*
* Improved ReadMe.md. - *Simon Sarasova*
* Improved Seekia's slogan and Whitepaper.md. - *Simon Sarasova*
* Added an Estimated Time Remaining label to 2 processes within the Create Genetic Models utility. - *Simon Sarasova*

View file

@ -9,4 +9,4 @@ Many other people have written code for modules which are imported by Seekia. Th
Name | Date Of First Commit | Number Of Commits
--- | --- | ---
Simon Sarasova | June 13, 2023 | 263
Simon Sarasova | June 13, 2023 | 265

View file

@ -131,7 +131,7 @@ Get Simon's contact information by visiting his website at [SimonSarasova.eth](i
You can use Brave browser to access a .eth IPFS website.
You can also use an IPFS gateway service if you do not have Brave Browser. These services are operated by third parties, so you can access his website from multiple gateways to make sure you are seeing an authentic version of his website:
You can also use an IPFS gateway service if you do not have Brave Browser. These services are operated by third parties. You can access his website from multiple gateways to ensure you are seeing an authentic version of his website:
[SimonSarasova.eth.limo](https://simonsarasova.eth.limo)

View file

@ -20,22 +20,23 @@ import "gorgonia.org/gorgonia"
import "gorgonia.org/tensor"
import mathRand "math/rand/v2"
import "math"
import "bytes"
import "encoding/gob"
import "slices"
import "errors"
//import "log"
type NeuralNetwork struct{
// ExprGraph is a data structure for a directed acyclic graph (of expressions).
graph *gorgonia.ExprGraph
graph *gorgonia.ExprGraph
// These are the weights for each layer of neurons
weights1 *gorgonia.Node
weights2 *gorgonia.Node
weights3 *gorgonia.Node
weights4 *gorgonia.Node
// This is the computed prediction
prediction *gorgonia.Node
@ -97,7 +98,6 @@ type neuralNetworkForEncoding struct{
Weights1 []float32
Weights2 []float32
Weights3 []float32
Weights4 []float32
Weights1Rows int
Weights1Columns int
@ -105,8 +105,6 @@ type neuralNetworkForEncoding struct{
Weights2Columns int
Weights3Rows int
Weights3Columns int
Weights4Rows int
Weights4Columns int
}
func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte, error){
@ -114,12 +112,10 @@ func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte,
weights1 := inputNeuralNetwork.weights1
weights2 := inputNeuralNetwork.weights2
weights3 := inputNeuralNetwork.weights3
weights4 := inputNeuralNetwork.weights4
weights1Slice := weights1.Value().Data().([]float32)
weights2Slice := weights2.Value().Data().([]float32)
weights3Slice := weights3.Value().Data().([]float32)
weights4Slice := weights4.Value().Data().([]float32)
weights1Rows := weights1.Shape()[0]
weights1Columns := weights1.Shape()[1]
@ -127,14 +123,11 @@ func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte,
weights2Columns := weights2.Shape()[1]
weights3Rows := weights3.Shape()[0]
weights3Columns := weights3.Shape()[1]
weights4Rows := weights4.Shape()[0]
weights4Columns := weights4.Shape()[1]
newNeuralNetworkForEncoding := neuralNetworkForEncoding{
Weights1: weights1Slice,
Weights2: weights2Slice,
Weights3: weights3Slice,
Weights4: weights4Slice,
Weights1Rows: weights1Rows,
Weights1Columns: weights1Columns,
@ -142,8 +135,6 @@ func EncodeNeuralNetworkObjectToBytes(inputNeuralNetwork NeuralNetwork)([]byte,
Weights2Columns: weights2Columns,
Weights3Rows: weights3Rows,
Weights3Columns: weights3Columns,
Weights4Rows: weights4Rows,
Weights4Columns: weights4Columns,
}
buffer := new(bytes.Buffer)
@ -176,7 +167,6 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
weights1 := newNeuralNetworkForEncoding.Weights1
weights2 := newNeuralNetworkForEncoding.Weights2
weights3 := newNeuralNetworkForEncoding.Weights3
weights4 := newNeuralNetworkForEncoding.Weights4
weights1Rows := newNeuralNetworkForEncoding.Weights1Rows
weights1Columns := newNeuralNetworkForEncoding.Weights1Columns
@ -184,8 +174,6 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
weights2Columns := newNeuralNetworkForEncoding.Weights2Columns
weights3Rows := newNeuralNetworkForEncoding.Weights3Rows
weights3Columns := newNeuralNetworkForEncoding.Weights3Columns
weights4Rows := newNeuralNetworkForEncoding.Weights4Rows
weights4Columns := newNeuralNetworkForEncoding.Weights4Columns
// This is the graph object we add each layer to
newGraph := gorgonia.NewGraph()
@ -210,7 +198,6 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
layer1 := getNewNeuralNetworkLayerWeights("Weights1", weights1Rows, weights1Columns, weights1)
layer2 := getNewNeuralNetworkLayerWeights("Weights2", weights2Rows, weights2Columns, weights2)
layer3 := getNewNeuralNetworkLayerWeights("Weights3", weights3Rows, weights3Columns, weights3)
layer4 := getNewNeuralNetworkLayerWeights("Weights4", weights4Rows, weights4Columns, weights4)
newNeuralNetworkObject := NeuralNetwork{
@ -219,57 +206,204 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
weights1: layer1,
weights2: layer2,
weights3: layer3,
weights4: layer4,
}
return newNeuralNetworkObject, nil
}
//Outputs:
// -int: Number of loci values that are known
// -int: Number of loci values that are known and phased
// -int: Number of loci
// -error
func GetLociInfoFromInputLayer(inputLayer []float32)(int, int, int, error){
// Each input layer has 3 neurons for each locus
// Each rsID (locus) is represented by 3 neurons: LocusExists/LocusIsPhased, Allele1 Value, Allele2 Value
// The LocusExists/LocusIsPhased neuron stores information like so:
// -0 = Locus value is unknown
// -0.5 = Locus Is known, phase is unknown
// -1 = Locus Is Known, phase is known
// Each rsID's neurons are concatenated together to form the inputLayer
inputLayerLength := len(inputLayer)
if (inputLayerLength%3 != 0){
return 0, 0, 0, errors.New("GetInputInfoFromInputLayer called with invalid length input layer: Not evenly divisible by 4.")
}
numberOfLoci := len(inputLayer)/3
numberOfLociValuesThatAreKnown := 0
numberOfLociValuesThatAreKnownAndPhased := 0
for index, neuronValue := range inputLayer{
indexRemainder := index%3
if (indexRemainder == 0){
if (neuronValue == 0){
continue
}
numberOfLociValuesThatAreKnown += 1
/// We use an inequality instead of ==1 because floats are imprecise
if (neuronValue > 0.99){
numberOfLociValuesThatAreKnown += 1
numberOfLociValuesThatAreKnownAndPhased += 1
}
}
}
if (numberOfLociValuesThatAreKnown == 0){
return 0, 0, 0, errors.New("GetInputInfoFromInputLayer called with input layer with no known loci values.")
}
return numberOfLociValuesThatAreKnown, numberOfLociValuesThatAreKnownAndPhased, numberOfLoci, nil
}
// This function returns which outcome is being described from a neural network's final output layer
// Outputs:
// -string: Output Name (Example: "Blue")
// -error
func GetOutcomeNameFromOutputLayer(traitName string, verifyOutputLayer bool, outputLayer []float32)(string, error){
if (verifyOutputLayer == true){
// We make sure all neurons sum to 1
summedNeurons := float32(0)
for _, neuronValue := range outputLayer{
summedNeurons += neuronValue
}
// We allow a small amount of inaccuracy due to the imprecise nature of floats.
if (summedNeurons > 1.1 || summedNeurons < .99){
summedNeuronsString := helpers.ConvertFloat32ToString(summedNeurons)
return "", errors.New("GetOutcomeNameFromOutputLayer called with layer containing neuron values which don't sum to 1: " + summedNeuronsString)
}
}
getBiggestNeuronIndex := func()int{
biggestNeuronValue := float32(0)
biggestNeuronIndex := 0
for index, neuronValue := range outputLayer{
if (index == 0){
biggestNeuronValue = neuronValue
} else {
if (neuronValue > biggestNeuronValue){
biggestNeuronValue = neuronValue
biggestNeuronIndex = index
}
}
}
return biggestNeuronIndex
}
biggestNeuronIndex := getBiggestNeuronIndex()
switch traitName{
case "Eye Color":{
if (len(outputLayer) != 4){
return "", errors.New("GetOutcomeNameFromOutputLayer called with invalid length output layer.")
}
switch biggestNeuronIndex{
case 0:{
return "Blue", nil
}
case 1:{
return "Green", nil
}
case 2:{
return "Hazel", nil
}
case 3:{
return "Brown", nil
}
}
}
case "Lactose Tolerance":{
if (len(outputLayer) != 2){
return "", errors.New("GetOutcomeNameFromOutputLayer called with invalid length output layer.")
}
switch biggestNeuronIndex{
case 0:{
return "Tolerant", nil
}
case 1:{
return "Intolerant", nil
}
}
}
}
return "", errors.New("GetOutcomeNameFromOutputLayer called with unknown traitName: " + traitName)
}
//Outputs:
// -int: Layer 1 neuron count (input layer)
// -int: Layer 2 neuron count
// -int: Layer 3 neuron count
// -int: Layer 4 neuron count
// -int: Layer 5 neuron count (output layer)
// -int: Layer 4 neuron count (output layer)
// -error
func getNeuralNetworkLayerSizes(traitName string)(int, int, int, int, int, error){
func getNeuralNetworkLayerSizes(traitName string)(int, int, int, int, error){
switch traitName{
case "Eye Color":{
// There are 376 input neurons
// There are 282 input neurons
// There are 4 output neurons, each representing a color
// There are 4 colors: Blue, Green, Brown, Hazel
return 376, 200, 100, 50, 4, nil
return 282, 100, 50, 4, nil
}
case "Lactose Tolerance":{
// There are 6 input neurons
// There are 2 output neurons, each representing a tolerance: Tolerant, Intolerant
return 6, 4, 3, 2, nil
}
}
return 0, 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown traitName: " + traitName)
return 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown traitName: " + traitName)
}
//This function converts a genome allele to a neuron to use in a tensor
// A value of 0 means that the allele is unknown
func convertAlleleToNeuron(allele string)(float32, error){
switch allele{
case "C":{
return 0, nil
return 0.16, nil
}
case "A":{
return 0.2, nil
return 0.32, nil
}
case "T":{
return 0.4, nil
return 0.48, nil
}
case "G":{
return 0.6, nil
return 0.64, nil
}
case "I":{
@ -295,7 +429,7 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
userPhenotypeDataObject readBiobankData.PhenotypeData_OpenSNP,
userLocusValuesMap map[int64]locusValue.LocusValue)(bool, []TrainingData, error){
if (traitName != "Eye Color"){
if (traitName != "Eye Color" && traitName != "Lactose Tolerance"){
return false, nil, errors.New("CreateGeneticPredictionTrainingData_OpenSNP called with unknown traitName: " + traitName)
}
@ -315,11 +449,15 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
// Each TrainingData holds a variation of the user's genome rsID values
// We add many rows with withheld data to improve training data
numberOfInputLayerRows, _, _, _, numberOfOutputLayerRows, err := getNeuralNetworkLayerSizes(traitName)
numberOfInputLayerRows, _, _, numberOfOutputLayerRows, err := getNeuralNetworkLayerSizes(traitName)
if (err != nil) { return false, nil, err }
// Each rsID is represented by 4 neurons: LocusExists, LocusIsPhased, Allele1 Value, Allele2 Value
expectedNumberOfInputLayerRows := len(traitRSIDs) * 4
// Each rsID is represented by 3 neurons: LocusExists/LocusIsPhased, Allele1 Value, Allele2 Value
// The LocusExists/LocusIsPhased neuron stores information like so:
// -0 = Locus value is unknown
// -0.5 = Locus Is known, phase is unknown
// -1 = Locus Is Known, phase is known
expectedNumberOfInputLayerRows := len(traitRSIDs) * 3
if (numberOfInputLayerRows != expectedNumberOfInputLayerRows){
@ -365,33 +503,52 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
// -error
getUserTraitValueNeurons := func()(bool, []float32, error){
if (traitName == "Eye Color"){
switch traitName{
userEyeColorIsKnown := userPhenotypeDataObject.EyeColorIsKnown
if (userEyeColorIsKnown == false){
return false, nil, nil
case "Eye Color":{
userEyeColorIsKnown := userPhenotypeDataObject.EyeColorIsKnown
if (userEyeColorIsKnown == false){
return false, nil, nil
}
userEyeColor := userPhenotypeDataObject.EyeColor
if (userEyeColor == "Blue"){
return true, []float32{1, 0, 0, 0}, nil
} else if (userEyeColor == "Green"){
return true, []float32{0, 1, 0, 0}, nil
} else if (userEyeColor == "Hazel"){
return true, []float32{0, 0, 1, 0}, nil
} else if (userEyeColor == "Brown"){
return true, []float32{0, 0, 0, 1}, nil
}
return false, nil, errors.New("Malformed userPhenotypeDataObject: Invalid eyeColor: " + userEyeColor)
}
case "Lactose Tolerance":{
userEyeColor := userPhenotypeDataObject.EyeColor
userLactoseToleranceIsKnown := userPhenotypeDataObject.LactoseToleranceIsKnown
if (userLactoseToleranceIsKnown == false){
return false, nil, nil
}
if (userEyeColor == "Blue"){
userLactoseTolerance := userPhenotypeDataObject.LactoseTolerance
return true, []float32{1, 0, 0, 0}, nil
if (userLactoseTolerance == true){
} else if (userEyeColor == "Green"){
return true, []float32{1, 0}, nil
}
return true, []float32{0, 1, 0, 0}, nil
} else if (userEyeColor == "Hazel"){
return true, []float32{0, 0, 1, 0}, nil
} else if (userEyeColor == "Brown"){
return true, []float32{0, 0, 0, 1}, nil
return true, []float32{0, 1}, nil
}
return false, nil, errors.New("Malformed userPhenotypeDataObject: Invalid eyeColor: " + userEyeColor)
}
return false, nil, errors.New("Unknown traitName: " + traitName)
@ -409,6 +566,12 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
return false, nil, errors.New("getUserTraitValueNeurons returning invalid length layer slice.")
}
// We want the initial training data to be the same for each call of this function that has the same input parameters
// This is a necessary step so our neural network models will be reproducable
// Reproducable means that other people can run the code and produce the same models, byte-for-byte
pseudorandomNumberGenerator := mathRand.New(mathRand.NewPCG(1, 2))
// We create 110 examples per user.
// We randomize allele order whenever phase for the locus is unknown
// 50% of the time we randomize allele order even when phase is known to train the model on unphased data
@ -462,33 +625,41 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
probabilityOfUsingLoci := getProbabilityOfUsingLoci()
// In the inputLayer, each locus value is represented by 4 neurons:
// 1. LocusExists (Either 0 or 1)
// 2. LocusIsPhased (Either 0 or 1)
// 3. Allele1 Locus Value (Value between 0-1)
// 4. Allele2 Locus Value (Value between 0-1)
// In the inputLayer, each locus value is represented by 3 neurons:
// 1. LocusExists/LocusIsPhased
// -0 = Locus value is unknown
// -0.5 = Locus Is known, phase is unknown
// -1 = Locus Is Known, phase is known
// 2. Allele1 Locus Value (Value between 0-1)
// -0 = Value is unknown
// 3. Allele2 Locus Value (Value between 0-1)
// -0 = Value is unknown
inputLayerLength := len(traitRSIDsList) * 4
anyLocusExists := false
inputLayerLength := len(traitRSIDsList) * 3
inputLayer := make([]float32, 0, inputLayerLength)
for _, rsID := range traitRSIDsList{
useLocusBool, err := helpers.GetRandomBoolWithProbability(probabilityOfUsingLoci)
if (err != nil) { return false, nil, err }
if (useLocusBool == false){
randomFloat := pseudorandomNumberGenerator.Float64()
if (randomFloat > probabilityOfUsingLoci){
// This if statement has a !probabilityOfUsingLoci chance of being true.
// We are skipping this locus
inputLayer = append(inputLayer, 0, 0, 0, 0)
inputLayer = append(inputLayer, 0, 0, 0)
continue
}
userLocusValue, exists := userLocusValuesMap[rsID]
if (exists == false){
// This user's locus value is unknown
inputLayer = append(inputLayer, 0, 0, 0, 0)
inputLayer = append(inputLayer, 0, 0, 0)
continue
}
anyLocusExists = true
getLocusAlleles := func()(string, string){
locusAllele1 := userLocusValue.Base1Value
@ -498,9 +669,11 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
return locusAllele1, locusAllele2
}
randomBool := helpers.GetRandomBool()
// We randomize the phase of the locus
if (randomBool == false){
randomNumber := pseudorandomNumberGenerator.IntN(2)
if (randomNumber == 1){
// This has a 50% chance of being true.
return locusAllele1, locusAllele2
}
@ -514,10 +687,16 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
locusAllele2NeuronValue, err := convertAlleleToNeuron(locusAllele2)
if (err != nil) { return false, nil, err }
getLocusIsPhasedNeuronValue := func()float32{
getLocusIsKnownAndPhasedNeuronValue := func()float32{
if (locusAllele1 == locusAllele2){
// Phase of locus must be known.
// Swapping the loci would change nothing.
return 1
}
if (randomizePhaseBool == true){
return 0
return 0.5
}
locusIsPhased := userLocusValue.LocusIsPhased
@ -525,12 +704,18 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
return 1
}
return 0
return 0.5
}
locusIsPhasedNeuronValue := getLocusIsPhasedNeuronValue()
locusIsKnownAndPhasedNeuronValue := getLocusIsKnownAndPhasedNeuronValue()
inputLayer = append(inputLayer, 1, locusIsPhasedNeuronValue, locusAllele1NeuronValue, locusAllele2NeuronValue)
inputLayer = append(inputLayer, locusIsKnownAndPhasedNeuronValue, locusAllele1NeuronValue, locusAllele2NeuronValue)
}
if (anyLocusExists == false){
// We have 0 known loci for this training example.
// We won't add it to the training data.
continue
}
userTraitValueNeuronsCopy := slices.Clone(userTraitValueNeurons)
@ -548,7 +733,7 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error){
layer1NeuronCount, layer2NeuronCount, layer3NeuronCount, layer4NeuronCount, layer5NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
layer1NeuronCount, layer2NeuronCount, layer3NeuronCount, layer4NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
if (err != nil) { return nil, err }
// This is the graph object we add each layer to
@ -572,8 +757,12 @@ func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error)
for i:=0; i < totalNumberOfNeurons; i++{
// This returns a pseudo-random number between 0 and 1
newWeight := pseudorandomNumberGenerator.Float32()
// We initialize the weights with He initialization
// He initialization = (0 +/- sqrt(2/n) where n is the number of nodes in the prior layer)
// pseudorandomNumberGenerator.Float32() returns a pseudo-random number between 0 and 1
newWeight := ((pseudorandomNumberGenerator.Float32()-0.5)*2) * float32(math.Sqrt(float64(2)/float64(layerNeuronRows)))
layerInitialWeightsList = append(layerInitialWeightsList, newWeight)
}
@ -594,7 +783,6 @@ func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error)
layer1 := getNewNeuralNetworkLayerWeights("Weights1", layer1NeuronCount, layer2NeuronCount)
layer2 := getNewNeuralNetworkLayerWeights("Weights2", layer2NeuronCount, layer3NeuronCount)
layer3 := getNewNeuralNetworkLayerWeights("Weights3", layer3NeuronCount, layer4NeuronCount)
layer4 := getNewNeuralNetworkLayerWeights("Weights4", layer4NeuronCount, layer5NeuronCount)
newNeuralNetworkObject := NeuralNetwork{
@ -603,7 +791,6 @@ func GetNewUntrainedNeuralNetworkObject(traitName string)(*NeuralNetwork, error)
weights1: layer1,
weights2: layer2,
weights3: layer3,
weights4: layer4,
}
return &newNeuralNetworkObject, nil
@ -616,75 +803,68 @@ func (inputNetwork *NeuralNetwork)getLearnables()gorgonia.Nodes{
weights1 := inputNetwork.weights1
weights2 := inputNetwork.weights2
weights3 := inputNetwork.weights3
weights4 := inputNetwork.weights4
result := gorgonia.Nodes{weights1, weights2, weights3, weights4}
result := gorgonia.Nodes{weights1, weights2, weights3}
return result
}
// This function will train the neural network
// The function is passed a single TrainingData example to train on
//
// TODO: This function doesn't work
// The weights do not change during training
// I think the layer dimensions are wrong?
//
func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, trainingData TrainingData)error{
// The function is passed a batch of TrainingData examples to train on
// Inputs:
// -string: Trait Name
// -*NeuralNetwork
// -func()(bool, bool, TrainingData, error): Function to get the next training data.
// -Outputs:
// -bool: User stopped the training run
// -bool: Another training data exists
// -TrainingData: The next training data example
// -error
// Outputs:
// -bool: Process completed (was not stopped mid-way)
// -error
func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, getNextTrainingData func()(bool, bool, TrainingData, error))(bool, error){
layer1NeuronCount, _, _, _, layer5NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
if (err != nil) { return err }
layer1NeuronCount, _, _, layer4NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
if (err != nil) { return false, err }
neuralNetworkGraph := neuralNetworkObject.graph
// This inputLayer contains the allele values for this training example
trainingDataInputLayer := trainingData.InputLayer
// This outputLayer contains the phenotype for this training example (example: Eye color of Blue)
trainingDataOutputLayer := trainingData.OutputLayer
// We convert our inputTensor and outputTensor to the type *Node
inputTensorShapeObject := tensor.WithShape(1, layer1NeuronCount)
outputTensorShapeObject := tensor.WithShape(1, layer5NeuronCount)
inputTensorBacking := tensor.WithBacking(trainingDataInputLayer)
outputTensorBacking := tensor.WithBacking(trainingDataOutputLayer)
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
outputTensor := tensor.New(outputTensorShapeObject, outputTensorBacking)
// We first create the input and output nodes
// They don't have any values yet.
trainingDataInputNode := gorgonia.NewMatrix(neuralNetworkGraph,
tensor.Float32,
gorgonia.WithName("input"),
gorgonia.WithName("Input"),
gorgonia.WithShape(1, layer1NeuronCount),
gorgonia.WithValue(inputTensor),
)
trainingDataOutputNode := gorgonia.NewMatrix(neuralNetworkGraph,
trainingDataExpectedOutputNode := gorgonia.NewMatrix(neuralNetworkGraph,
tensor.Float32,
gorgonia.WithName("expectedOutput"),
gorgonia.WithShape(1, layer5NeuronCount),
gorgonia.WithValue(outputTensor),
gorgonia.WithName("ExpectedOutput"),
gorgonia.WithShape(1, layer4NeuronCount),
)
err = neuralNetworkObject.prepareToComputePrediction(trainingDataInputNode)
if (err != nil) { return err }
err = neuralNetworkObject.buildNeuralNetwork(trainingDataInputNode)
if (err != nil) { return false, err }
// This computes the loss (how accurate was our prediction)
losses, err := gorgonia.Sub(trainingDataOutputNode, neuralNetworkObject.prediction)
if (err != nil) { return err }
losses, err := gorgonia.Sub(trainingDataExpectedOutputNode, neuralNetworkObject.prediction)
if (err != nil) { return false, err }
// Cost is an average of the losses
cost, err := gorgonia.Mean(losses)
if (err != nil) { return err }
squareOfLosses, err := gorgonia.Square(losses)
if (err != nil) { return false, err }
// Cost is an average of the square of losses
cost, err := gorgonia.Mean(squareOfLosses)
if (err != nil) { return false, err }
neuralNetworkLearnables := neuralNetworkObject.getLearnables()
// Grad takes a scalar cost node and a list of with-regards-to, and returns the gradient
_, err = gorgonia.Grad(cost, neuralNetworkLearnables...)
if (err != nil) { return err }
if (err != nil) { return false, err }
bindDualValues := gorgonia.BindDualValues(neuralNetworkLearnables...)
@ -692,29 +872,69 @@ func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, tr
virtualMachine := gorgonia.NewTapeMachine(neuralNetworkGraph, bindDualValues)
// This is the learn rate or step size for the solver.
learningRate := gorgonia.WithLearnRate(.001)
learningRate := gorgonia.WithLearnRate(.01)
// This clips the gradient if it gets too crazy
//gradientClip := gorgonia.WithClip(5)
// gradientClip := gorgonia.WithClip(.05)
solver := gorgonia.NewVanillaSolver(learningRate)
//solver := gorgonia.NewVanillaSolver(learningRate, gradientClip)
// solver := gorgonia.NewVanillaSolver(learningRate, gradientClip)
defer virtualMachine.Close()
for i:=0; i < 10; i++{
for {
err = virtualMachine.RunAll()
if (err != nil) { return err }
userStoppedTraining, nextDataExists, trainingDataObject, err := getNextTrainingData()
if (err != nil) { return false, err }
if (userStoppedTraining == true){
// User manually stopped the training run
return false, nil
}
if (nextDataExists == false){
// We are done training
break
}
// NodesToValueGrads is a utility function that converts a Nodes to a slice of ValueGrad for the solver
valueGrads := gorgonia.NodesToValueGrads(neuralNetworkLearnables)
// We convert our input training data slices to the type *Dense and assign them to our nodes
err := solver.Step(valueGrads)
if (err != nil) { return err }
// This inputLayer contains the allele values for this training example
trainingDataInputLayer := trainingDataObject.InputLayer
virtualMachine.Reset()
// This outputLayer contains the phenotype for this training example (example: Eye color of Blue)
trainingDataOutputLayer := trainingDataObject.OutputLayer
inputTensorShapeObject := tensor.WithShape(1, layer1NeuronCount)
outputTensorShapeObject := tensor.WithShape(1, layer4NeuronCount)
inputTensorBacking := tensor.WithBacking(trainingDataInputLayer)
outputTensorBacking := tensor.WithBacking(trainingDataOutputLayer)
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
outputTensor := tensor.New(outputTensorShapeObject, outputTensorBacking)
err = gorgonia.Let(trainingDataInputNode, inputTensor)
if (err != nil) { return false, err }
err = gorgonia.Let(trainingDataExpectedOutputNode, outputTensor)
if (err != nil) { return false, err }
// for i:=0; i < 10; i++{
err = virtualMachine.RunAll()
if (err != nil) { return false, err }
// NodesToValueGrads is a utility function that converts a Nodes to a slice of ValueGrad for the solver
valueGrads := gorgonia.NodesToValueGrads(neuralNetworkLearnables)
err = solver.Step(valueGrads)
if (err != nil) { return false, err }
virtualMachine.Reset()
// }
// log.Println(cost.Value())
}
return nil
return true, nil
}
@ -726,28 +946,29 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, inputLayer
neuralNetworkGraph := inputNeuralNetwork.graph
// We convert the inputLayer []float32 to a node object
numberOfInputNeurons := len(inputLayer)
inputNode := gorgonia.NewMatrix(neuralNetworkGraph,
tensor.Float32,
gorgonia.WithName("Input"),
gorgonia.WithShape(1, numberOfInputNeurons),
)
// We convert the inputLayer []float32 to a tensor *Dense object
inputTensorShapeObject := tensor.WithShape(1, numberOfInputNeurons)
inputTensorBacking := tensor.WithBacking(inputLayer)
inputTensor := tensor.New(inputTensorShapeObject, inputTensorBacking)
inputNode := gorgonia.NewMatrix(neuralNetworkGraph,
tensor.Float32,
gorgonia.WithName("input"),
gorgonia.WithShape(1, numberOfInputNeurons),
gorgonia.WithValue(inputTensor),
)
err := gorgonia.Let(inputNode, inputTensor)
if (err != nil) { return nil, err }
err := inputNeuralNetwork.prepareToComputePrediction(inputNode)
err = inputNeuralNetwork.buildNeuralNetwork(inputNode)
if (err != nil){ return nil, err }
prediction := inputNeuralNetwork.prediction
// Now we create a virtual machine to compute the prediction
neuralNetworkLearnables := inputNeuralNetwork.getLearnables()
@ -759,26 +980,25 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, inputLayer
err = virtualMachine.RunAll()
if (err != nil) { return nil, err }
prediction := inputNeuralNetwork.prediction
predictionValues := prediction.Value().Data().([]float32)
return predictionValues, nil
}
// This function will take a neural network and input layer and prepare the network to compute a prediction
// We still need to run a virtual machine after calling this function in order for the prediction to be generated
func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgonia.Node)error{
// This function will take a neural network and input layer and build the network to be able to compute a prediction
// We need to run a virtual machine after calling this function in order for the prediction to be generated
func (inputNetwork *NeuralNetwork)buildNeuralNetwork(inputLayer *gorgonia.Node)error{
// We copy pointer (says to do this in a resource i'm reading)
// We copy node pointer (says to do this in a resource i'm reading)
inputLayerCopy := inputLayer
// We multiply weights at each layer and perform rectification (ReLU) after each multiplication
// We multiply weights at each layer and perform sigmoid after each multiplication
weights1 := inputNetwork.weights1
weights2 := inputNetwork.weights2
weights3 := inputNetwork.weights3
weights4 := inputNetwork.weights4
layer1Product, err := gorgonia.Mul(inputLayerCopy, weights1)
if (err != nil) {
@ -787,9 +1007,11 @@ func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgoni
layer1ProductRectified, err := gorgonia.Rectify(layer1Product)
if (err != nil){
return errors.New("Layer 1 rectification failed: " + err.Error())
return errors.New("Layer 1 Rectify failed: " + err.Error())
}
weights2 := inputNetwork.weights2
layer2Product, err := gorgonia.Mul(layer1ProductRectified, weights2)
if (err != nil) {
return errors.New("Layer 2 multiplication failed: " + err.Error())
@ -797,35 +1019,21 @@ func (inputNetwork *NeuralNetwork)prepareToComputePrediction(inputLayer *gorgoni
layer2ProductRectified, err := gorgonia.Rectify(layer2Product)
if (err != nil){
return errors.New("Layer 2 rectification failed: " + err.Error())
return errors.New("Layer 2 Rectify failed: " + err.Error())
}
weights3 := inputNetwork.weights3
layer3Product, err := gorgonia.Mul(layer2ProductRectified, weights3)
if (err != nil) {
return errors.New("Layer 3 multiplication failed: " + err.Error())
}
layer3ProductRectified, err := gorgonia.Rectify(layer3Product)
if (err != nil){
return errors.New("Layer 3 rectification failed: " + err.Error())
}
// We SoftMax the output to get the prediction
layer4Product, err := gorgonia.Mul(layer3ProductRectified, weights4)
prediction, err := gorgonia.SoftMax(layer3Product)
if (err != nil) {
return errors.New("Layer 4 multiplication failed: " + err.Error())
}
layer4ProductRectified, err := gorgonia.Rectify(layer4Product)
if (err != nil){
return errors.New("Layer 4 rectification failed: " + err.Error())
}
// We sigmoid the output to get the prediction
//TODO: Use SoftMax instead?
prediction, err := gorgonia.Sigmoid(layer4ProductRectified)
if (err != nil) {
return errors.New("Sigmoid failed: " + err.Error())
return errors.New("SoftMax failed: " + err.Error())
}
inputNetwork.prediction = prediction

View file

@ -120,8 +120,20 @@ func GetGenomesWithMetadataListFromRawGenomesList(inputGenomesList []RawGenomeWi
locusAllele1 := locusBasePairValue.Allele1
locusAllele2 := locusBasePairValue.Allele2
getLocusIsPhasedBool := func()bool{
if (locusAllele1 == locusAllele2){
// Locus has to be phased, because phase flip does not change value
return true
}
return genomeIsPhased
}
locusIsPhased := getLocusIsPhasedBool()
locusValueObject := locusValue.LocusValue{
LocusIsPhased: genomeIsPhased,
LocusIsPhased: locusIsPhased,
Base1Value: locusAllele1,
Base2Value: locusAllele2,
}
@ -466,9 +478,20 @@ func GetGenomesWithMetadataListFromRawGenomesList(inputGenomesList []RawGenomeWi
// The OnlyExcludeConflicts will only omit when there is a tie
// The OnlyIncludeShared requires at least 2 to agree
getLocusIsPhased_OnlyExcludeConflicts := func()bool{
if (locusBase1 == locusBase2){
// These kinds of loci are always phased, becauses swapping the alleles changes nothing.
return true
}
return phaseIsKnown_OnlyExcludeConflicts
}
locusIsPhased_OnlyExcludeConflicts := getLocusIsPhased_OnlyExcludeConflicts()
onlyExcludeConflictsLocusValue := locusValue.LocusValue{
LocusIsPhased: phaseIsKnown_OnlyExcludeConflicts,
LocusIsPhased: locusIsPhased_OnlyExcludeConflicts,
Base1Value: locusBase1,
Base2Value: locusBase2,
}
@ -477,8 +500,19 @@ func GetGenomesWithMetadataListFromRawGenomesList(inputGenomesList []RawGenomeWi
if (mostRecordedSortedBasePairCount >= 2){
getLocusIsPhased_OnlyIncludeShared := func()bool{
if (locusBase1 == locusBase2){
// These kinds of loci are always phased, becauses swapping the alleles changes nothing.
return true
}
return phaseIsKnown_OnlyIncludeShared
}
locusIsPhased_OnlyIncludeShared := getLocusIsPhased_OnlyIncludeShared()
onlyIncludeSharedLocusValue := locusValue.LocusValue{
LocusIsPhased: phaseIsKnown_OnlyIncludeShared,
LocusIsPhased: locusIsPhased_OnlyIncludeShared,
Base1Value: locusBase1,
Base2Value: locusBase2,
}

View file

@ -246,6 +246,13 @@ func CeilFloat64ToInt(input float64)(int, error){
return ceiledInt, nil
}
func ConvertFloat32ToString(input float32) string{
result := strconv.FormatFloat(float64(input), 'f', 5, 32)
return result
}
func ConvertFloat64ToString(input float64) string{
result := strconv.FormatFloat(input, 'f', 5, 64)

View file

@ -125,7 +125,7 @@ func getEyeColorTraitObject()Trait{
TraitDescription: "The color of a person's eyes.",
LociList: eyeColorLociList,
RulesList: []TraitRule{},
OutcomesList: []string{},
OutcomesList: []string{"Blue", "Green", "Hazel", "Brown"},
References: referencesMap,
}

File diff suppressed because it is too large Load diff