seekia/utilities/createGeneticModels/createGeneticModels.go

2574 lines
89 KiB
Go

// createGeneticModels.go provides an interface to create genetic prediction models
// These are neural networks which predict attributes such as eye color and autism from raw genome files
// The OpenSNP.org dataset is used, and more datasets will be added in the future.
// You must download the dataset and extract it. The instructions are described in the utility.
// The trained models are saved in the /resources/trainedPredictionModels package for use in the Seekia app.
package main
import "fyne.io/fyne/v2"
import "fyne.io/fyne/v2/app"
import "fyne.io/fyne/v2/widget"
import "fyne.io/fyne/v2/container"
import "fyne.io/fyne/v2/theme"
import "fyne.io/fyne/v2/layout"
import "fyne.io/fyne/v2/dialog"
import "fyne.io/fyne/v2/data/binding"
import "seekia/resources/geneticReferences/polygenicDiseases"
import "seekia/resources/geneticReferences/traits"
import "seekia/resources/geneticReferences/locusMetadata"
import "seekia/resources/trainedPredictionModels"
import "seekia/internal/encoding"
import "seekia/internal/genetics/locusValue"
import "seekia/internal/genetics/prepareRawGenomes"
import "seekia/internal/genetics/readRawGenomes"
import "seekia/internal/genetics/geneticPrediction"
import "seekia/internal/genetics/geneticPredictionModels"
import "seekia/internal/globalSettings"
import "seekia/internal/helpers"
import "seekia/internal/imagery"
import "seekia/internal/localFilesystem"
import "seekia/internal/genetics/readBiobankData"
import "errors"
import "crypto/sha256"
import "bytes"
import "image/color"
import "io"
import "os"
import "strings"
import "sync"
import "slices"
import "math"
import mathRand "math/rand/v2"
import goFilepath "path/filepath"
import "time"
func main(){
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil){
panic(err)
return
}
err = traits.InitializeTraitVariables()
if (err != nil){
panic(err)
return
}
err = globalSettings.InitializeGlobalSettingsDatastore()
if (err != nil){
panic(err)
return
}
app := app.New()
customTheme := getCustomFyneTheme()
app.Settings().SetTheme(customTheme)
window := app.NewWindow("Seekia - Create Genetic Models Utility")
windowSize := fyne.NewSize(600, 600)
window.Resize(windowSize)
window.CenterOnScreen()
setHomePage(window)
window.ShowAndRun()
}
func getWidgetCentered(widget fyne.Widget)*fyne.Container{
widgetCentered := container.NewHBox(layout.NewSpacer(), widget, layout.NewSpacer())
return widgetCentered
}
func getLabelCentered(text string) *fyne.Container{
label := widget.NewLabel(text)
labelCentered := container.NewHBox(layout.NewSpacer(), label, layout.NewSpacer())
return labelCentered
}
func getBoldLabel(text string) fyne.Widget{
titleStyle := fyne.TextStyle{
Bold: true,
Italic: false,
Monospace: false,
}
boldLabel := widget.NewLabelWithStyle(text, fyne.TextAlign(fyne.TextAlignCenter), titleStyle)
return boldLabel
}
func getItalicLabel(text string) fyne.Widget{
italicTextStyle := fyne.TextStyle{
Bold: false,
Italic: true,
Monospace: false,
}
italicLabel := widget.NewLabelWithStyle(text, fyne.TextAlign(fyne.TextAlignCenter), italicTextStyle)
return italicLabel
}
func getBoldLabelCentered(inputText string)*fyne.Container{
boldLabel := getBoldLabel(inputText)
boldLabelCentered := container.NewHBox(layout.NewSpacer(), boldLabel, layout.NewSpacer())
return boldLabelCentered
}
func getItalicLabelCentered(inputText string)*fyne.Container{
italicLabel := getItalicLabel(inputText)
italicLabelCentered := container.NewHBox(layout.NewSpacer(), italicLabel, layout.NewSpacer())
return italicLabelCentered
}
func showUnderConstructionDialog(window fyne.Window){
dialogTitle := "Under Construction"
dialogMessageA := getLabelCentered("Seekia is under construction.")
dialogMessageB := getLabelCentered("This page/feature needs to be built.")
dialogContent := container.NewVBox(dialogMessageA, dialogMessageB)
dialog.ShowCustom(dialogTitle, "Close", dialogContent, window)
}
func getBackButtonCentered(previousPage func())*fyne.Container{
backButton := getWidgetCentered(widget.NewButtonWithIcon("Go Back", theme.NavigateBackIcon(), previousPage))
return backButton
}
func setErrorEncounteredPage(window fyne.Window, err error, previousPage func()){
title := getBoldLabelCentered("Error Encountered")
backButton := getBackButtonCentered(previousPage)
description1 := getLabelCentered("Something went wrong. Report this error to Seekia developers.")
header := container.NewVBox(title, backButton, widget.NewSeparator(), description1, widget.NewSeparator())
getErrorString := func()string{
if (err == nil){
return "No nav bar error encountered page called with nil error."
}
errorString := err.Error()
return errorString
}
errorString := getErrorString()
errorLabel := widget.NewLabel(errorString)
errorLabel.Wrapping = 3
errorLabel.Alignment = 1
errorLabel.TextStyle = fyne.TextStyle{
Bold: true,
Italic: false,
Monospace: false,
}
//TODO: Add copyable toggle
page := container.NewBorder(header, nil, nil, nil, errorLabel)
window.SetContent(page)
}
// This loading screen shows no progress, so it should only be used when retrieving progress is impossible
func setLoadingScreen(window fyne.Window, pageTitle string, loadingText string){
title := getBoldLabelCentered(pageTitle)
loadingLabel := getWidgetCentered(getItalicLabel(loadingText))
progressBar := getWidgetCentered(widget.NewProgressBarInfinite())
pageContent := container.NewVBox(title, loadingLabel, progressBar)
page := container.NewCenter(pageContent)
window.SetContent(page)
}
func setHomePage(window fyne.Window){
currentPage := func(){setHomePage(window)}
title := getBoldLabelCentered("Create Genetic Models Utility")
description1 := getLabelCentered("This utility is used to create the genetic prediction models.")
description2 := getLabelCentered("These models are used to predict attributes such as eye color and autism from raw genome files.")
description3 := getLabelCentered("Seekia aims to have open source and reproducible genetic prediction technology.")
step1Label := getLabelCentered("Step 1:")
downloadTrainingDataButton := getWidgetCentered(widget.NewButton("Download Training Data", func(){
setDownloadTrainingDataPage(window, currentPage)
}))
step2Label := getLabelCentered("Step 2:")
extractTrainingDataButton := getWidgetCentered(widget.NewButton("Extract Training Data", func(){
setExtractTrainingDataPage(window, currentPage)
}))
step3Label := getLabelCentered("Step 3:")
createTrainingDataButton := getWidgetCentered(widget.NewButton("Create Training Data", func(){
setCreateTrainingDataPage(window, currentPage)
}))
step4Label := getLabelCentered("Step 4:")
trainModelsButton := getWidgetCentered(widget.NewButton("Train Models", func(){
setTrainModelsPage(window, currentPage)
}))
step5Label := getLabelCentered("Step 5:")
testModelsButton := getWidgetCentered(widget.NewButton("Test Models", func(){
setTestModelsPage(window, currentPage)
}))
//TODO: A page to verify the checksums of the generated .gob models
page := container.NewVBox(title, widget.NewSeparator(), description1, description2, description3, widget.NewSeparator(), step1Label, downloadTrainingDataButton, widget.NewSeparator(), step2Label, extractTrainingDataButton, widget.NewSeparator(), step3Label, createTrainingDataButton, widget.NewSeparator(), step4Label, trainModelsButton, widget.NewSeparator(), step5Label, testModelsButton)
window.SetContent(page)
}
func setDownloadTrainingDataPage(window fyne.Window, previousPage func()){
currentPage := func(){setDownloadTrainingDataPage(window, previousPage)}
title := getBoldLabelCentered("Download Training Data")
backButton := getBackButtonCentered(previousPage)
description1 := getLabelCentered("You must download the OpenSNP.org data dump file.")
description2 := getLabelCentered("This is a .tar.gz file which was created in August of 2023.")
description3 := getLabelCentered("It will be hosted on IPFS, a decentralized data sharing network.")
description4 := getLabelCentered("You must use an IPFS client to download the file.")
description5 := getLabelCentered("You can also download it via a torrent or web server if someone shares it elsewhere.")
currentClipboard := window.Clipboard()
ipfsIdentifierTitle := getLabelCentered("IPFS Content Identifier:")
ipfsIdentifierLabel := getBoldLabelCentered("Qme64v7Go941s3psokZ7aDngQR6Tdv55jDhUDdLZXsRiRh")
ipfsIdentifierCopyToClipboardButton := getWidgetCentered(widget.NewButtonWithIcon("Copy", theme.ContentCopyIcon(), func(){
currentClipboard.SetContent("Qme64v7Go941s3psokZ7aDngQR6Tdv55jDhUDdLZXsRiRh")
}))
fileNameTitle := getLabelCentered("File Name:")
fileNameLabel := getBoldLabelCentered("OpenSNPDataArchive.tar.gz")
fileHashTitle := getLabelCentered("File SHA256 Checksum Hash:")
fileHashLabel := getBoldLabelCentered("49f84fb71cb12df718a80c1ce25f6370ba758cbee8f24bd8a6d4f0da2e3c51ee")
fileSizeTitle := getLabelCentered("File Size:")
fileSizeLabel := getBoldLabelCentered("48,961,240 bytes (50.1 GB)")
fileExtractedSizeTitle := getLabelCentered("File Extracted Size:")
fileExtractedSizeLabel := getBoldLabelCentered("128,533,341,751 bytes (119.7 GB)")
verifyFileTitle := getBoldLabelCentered("Verify File")
verifyFileDescription1 := getLabelCentered("You can use the Seekia client to verify your downloaded file.")
verifyFileDescription2 := getLabelCentered("Press the button below and select your file.")
verifyFileDescription3 := getLabelCentered("This will take a while, because the file contents must be hashed.")
selectFileCallbackFunction := func(fyneFileObject fyne.URIReadCloser, err error){
if (err != nil){
setErrorEncounteredPage(window, err, currentPage)
return
}
if (fyneFileObject == nil){
return
}
setLoadingScreen(window, "Hashing File", "Calculating file hash...")
filePath := fyneFileObject.URI().String()
filePath = strings.TrimPrefix(filePath, "file://")
fileObject, err := os.Open(filePath)
if (err != nil){
setErrorEncounteredPage(window, err, currentPage)
return
}
defer fileObject.Close()
//TODO: Use Blake3 instead of sha256 for faster hashing
hasher := sha256.New()
_, err = io.Copy(hasher, fileObject)
if (err != nil){
setErrorEncounteredPage(window, err, currentPage)
return
}
hashResultBytes := hasher.Sum(nil)
expectedResult := "49f84fb71cb12df718a80c1ce25f6370ba758cbee8f24bd8a6d4f0da2e3c51ee"
expectedResultBytes, err := encoding.DecodeHexStringToBytes(expectedResult)
if (err != nil){
setErrorEncounteredPage(window, err, currentPage)
return
}
currentPage()
bytesAreEqual := bytes.Equal(hashResultBytes, expectedResultBytes)
if (bytesAreEqual == false){
title := "File Is Invalid"
dialogMessage1 := getLabelCentered("The file you downloaded is not valid.")
dialogMessage2 := getLabelCentered("The SHA256 Checksum does not match the expected checksum.")
dialogContent := container.NewVBox(dialogMessage1, dialogMessage2)
dialog.ShowCustom(title, "Close", dialogContent, window)
} else {
title := "File Is Valid"
dialogMessage1 := getLabelCentered("The file you downloaded is valid!")
dialogMessage2 := getLabelCentered("The SHA256 Checksum matches the expected checksum.")
dialogContent := container.NewVBox(dialogMessage1, dialogMessage2)
dialog.ShowCustom(title, "Close", dialogContent, window)
}
}
verifyFileButton := getWidgetCentered(widget.NewButton("Verify File", func(){
dialog.ShowFileOpen(selectFileCallbackFunction, window)
}))
page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, description3, description4, description5, widget.NewSeparator(), ipfsIdentifierTitle, ipfsIdentifierLabel, ipfsIdentifierCopyToClipboardButton, widget.NewSeparator(), fileNameTitle, fileNameLabel, widget.NewSeparator(), fileHashTitle, fileHashLabel, widget.NewSeparator(), fileSizeTitle, fileSizeLabel, widget.NewSeparator(), fileExtractedSizeTitle, fileExtractedSizeLabel, widget.NewSeparator(), verifyFileTitle, verifyFileDescription1, verifyFileDescription2, verifyFileDescription3, verifyFileButton)
scrollablePage := container.NewVScroll(page)
window.SetContent(scrollablePage)
}
func setExtractTrainingDataPage(window fyne.Window, previousPage func()){
currentPage := func(){setExtractTrainingDataPage(window, previousPage)}
title := getBoldLabelCentered("Extract Training Data")
backButton := getBackButtonCentered(previousPage)
description1 := getLabelCentered("You must extract the downloaded OpenSNPDataArchive.tar.gz to a folder.")
description2 := getLabelCentered("Once you have extracted the file, select the extracted folder using the page below.")
currentLocationTitle := getLabelCentered("Current Folder Location:")
getCurrentLocationLabel := func()(*fyne.Container, error){
fileExists, fileContents, err := localFilesystem.GetFileContents("./OpenSNPDataArchiveFolderpath.txt")
if (err != nil) { return nil, err }
if (fileExists == false){
noneLabel := getItalicLabelCentered("None")
return noneLabel, nil
}
folderpathLabel := getBoldLabelCentered(string(fileContents))
return folderpathLabel, nil
}
currentLocationLabel, err := getCurrentLocationLabel()
if (err != nil) {
setErrorEncounteredPage(window, err, previousPage)
return
}
selectFolderCallbackFunction := func(folderObject fyne.ListableURI, err error){
if (err != nil){
title := "Failed to open folder."
dialogMessage := getLabelCentered("Report this error to Seekia developers: " + err.Error())
dialogContent := container.NewVBox(dialogMessage)
dialog.ShowCustom(title, "Close", dialogContent, window)
return
}
if (folderObject == nil) {
return
}
folderPath := folderObject.Path()
fileContents := []byte(folderPath)
err = localFilesystem.CreateOrOverwriteFile(fileContents, "./", "OpenSNPDataArchiveFolderpath.txt")
if (err != nil){
title := "Failed to save file."
dialogMessage := getLabelCentered("Report this error to Seekia developers: " + err.Error())
dialogContent := container.NewVBox(dialogMessage)
dialog.ShowCustom(title, "Close", dialogContent, window)
return
}
currentPage()
}
selectFolderLocationButton := getWidgetCentered(widget.NewButtonWithIcon("Select Folder Location", theme.FolderIcon(), func(){
dialog.ShowFolderOpen(selectFolderCallbackFunction, window)
}))
page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, widget.NewSeparator(), currentLocationTitle, currentLocationLabel, widget.NewSeparator(), selectFolderLocationButton)
scrollablePage := container.NewVScroll(page)
window.SetContent(scrollablePage)
}
func setCreateTrainingDataPage(window fyne.Window, previousPage func()){
currentPage := func(){setCreateTrainingDataPage(window, previousPage)}
title := getBoldLabelCentered("Create Training Data")
backButton := getBackButtonCentered(previousPage)
description1 := getLabelCentered("Press the button below to begin creating the training data.")
description2 := getLabelCentered("This will prepare each user's genome into a file to use to train each neural network.")
description3 := getLabelCentered("This will take a while.")
beginCreatingButton := getWidgetCentered(widget.NewButtonWithIcon("Begin Creating Data", theme.MediaPlayIcon(), func(){
setStartAndMonitorCreateTrainingDataPage(window, currentPage)
}))
page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, description3, beginCreatingButton)
window.SetContent(page)
}
func setStartAndMonitorCreateTrainingDataPage(window fyne.Window, previousPage func()){
err := locusMetadata.InitializeLocusMetadataVariables()
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
return
}
title := getBoldLabelCentered("Creating Training Data")
fileExists, fileContents, err := localFilesystem.GetFileContents("./OpenSNPDataArchiveFolderpath.txt")
if (err != nil) {
setErrorEncounteredPage(window, err, previousPage)
return
}
if (fileExists == false){
backButton := getBackButtonCentered(previousPage)
description1 := getBoldLabelCentered("You have not selected your OpenSNP data archive folderpath.")
description2 := getLabelCentered("Go back to step 2 and follow the instructions.")
page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2)
window.SetContent(page)
return
}
dataArchiveFolderpath := string(fileContents)
progressDetailsBinding := binding.NewString()
estimatedTimeRemainingBinding := binding.NewString()
progressPercentageBinding := binding.NewFloat()
loadingBar := getWidgetCentered(widget.NewProgressBarWithData(progressPercentageBinding))
progressDetailsTitle := getBoldLabelCentered("Progress Details:")
progressDetailsLabel := widget.NewLabelWithData(progressDetailsBinding)
progressDetailsLabel.TextStyle = fyne.TextStyle{
Bold: false,
Italic: true,
Monospace: false,
}
progressDetailsLabelCentered := getWidgetCentered(progressDetailsLabel)
estimatedTimeRemainingLabel := widget.NewLabelWithData(estimatedTimeRemainingBinding)
estimatedTimeRemainingLabel.TextStyle = fyne.TextStyle{
Bold: false,
Italic: true,
Monospace: false,
}
estimatedTimeRemainingLabelCentered := getWidgetCentered(estimatedTimeRemainingLabel)
// We set this bool to true to stop the createData process
var createDataIsStoppedBoolMutex sync.RWMutex
createDataIsStoppedBool := false
cancelButton := getWidgetCentered(widget.NewButtonWithIcon("Cancel", theme.CancelIcon(), func(){
createDataIsStoppedBoolMutex.Lock()
createDataIsStoppedBool = true
createDataIsStoppedBoolMutex.Unlock()
previousPage()
}))
page := container.NewVBox(title, widget.NewSeparator(), loadingBar, progressDetailsTitle, progressDetailsLabelCentered, estimatedTimeRemainingLabelCentered, widget.NewSeparator(), cancelButton)
window.SetContent(page)
createTrainingDataFunction := func(){
var processProgressMutex sync.RWMutex
// This stores the progress of creating the training data (0-1)
processProgress := float64(0)
startUpdateTimeRemainingDisplayFunction := func(){
// This function updates the estimated time remaining label binding
updateTimeRemainingDisplayFunction := func()error{
startTime := time.Now().Unix()
for{
createDataIsStoppedBoolMutex.RLock()
createDataIsStopped := createDataIsStoppedBool
createDataIsStoppedBoolMutex.RUnlock()
if (createDataIsStopped == true){
// User exited the process/Process has completed
return nil
}
processProgressMutex.RLock()
currentProcessProgress := processProgress
processProgressMutex.RUnlock()
if (currentProcessProgress == 0){
estimatedTimeRemainingBinding.Set("Calculating required time...")
time.Sleep(time.Second)
continue
}
// We calculate how long we think it will take for the process to complete
currentTime := time.Now().Unix()
secondsElapsed := currentTime - startTime
// processProgress is a float64 which stores the progress as a value between 0-1
// To get the estimated total time the process will take, we divide the seconds elapsed by the proportion of progress
// For example:
// 0.1 (10%) at 10 seconds == Total process will take 100 seconds
// 0.5 (50%) at 20 seconds == Total process will take 40 seconds
totalSeconds := float64(secondsElapsed)/currentProcessProgress
estimatedSecondsRemaining := int64(totalSeconds) - secondsElapsed
estimatedTimeRemainingTranslated, err := helpers.ConvertUnixTimeDurationToUnitsTimeTranslated(estimatedSecondsRemaining, false)
if (err != nil) { return err }
estimatedTimeRemainingBinding.Set("Estimated Time Remaining: " + estimatedTimeRemainingTranslated)
time.Sleep(time.Second)
}
// This should never be reached
return errors.New("updateTimeRemainingDisplayFunction loop has broken.")
}
err := updateTimeRemainingDisplayFunction()
if (err != nil){
createDataIsStoppedBoolMutex.Lock()
createDataIsStoppedBool = true
createDataIsStoppedBoolMutex.Unlock()
setErrorEncounteredPage(window, err, previousPage)
return
}
}
go startUpdateTimeRemainingDisplayFunction()
//Outputs:
// -bool: Process completed (true == was not stopped mid-way)
// -bool: Data archive is well formed
// -error
createTrainingData := func()(bool, bool, error){
phenotypesFilepath := goFilepath.Join(dataArchiveFolderpath, "OpenSNPData", "phenotypes_202308230100.csv")
fileObject, err := os.Open(phenotypesFilepath)
if (err != nil){
fileDoesNotExist := os.IsNotExist(err)
if (fileDoesNotExist == true){
// Archive is corrupt
return true, false, nil
}
return false, false, err
}
defer fileObject.Close()
fileIsWellFormed, userPhenotypesList_OpenSNP := readBiobankData.ReadOpenSNPPhenotypesFile(fileObject)
if (fileIsWellFormed == false){
// Archive is corrupt
return true, false, nil
}
// This is the folderpath for the folder which contains all of the user raw genomes
openSNPRawGenomesFolderpath := goFilepath.Join(dataArchiveFolderpath, "OpenSNPData")
filesList, err := os.ReadDir(openSNPRawGenomesFolderpath)
if (err != nil) { return false, false, err }
// Map Structure: User ID -> List of user raw genome filepaths
userRawGenomeFilepathsMap := make(map[int][]string)
for _, filesystemObject := range filesList{
filepathIsFolder := filesystemObject.IsDir()
if (filepathIsFolder == true){
// Archive is corrupt
return true, false, nil
}
fileName := filesystemObject.Name()
// Example of a raw genome filename: "user1_file9_yearofbirth_1985_sex_XY.23andme"
userIDWithRawGenomeInfo, fileIsUserGenome := strings.CutPrefix(fileName, "user")
if (fileIsUserGenome == false){
// File is not a user genome, skip it.
continue
}
userIDString, rawGenomeInfo, separatorFound := strings.Cut(userIDWithRawGenomeInfo, "_")
if (separatorFound == false){
// Archive is corrupt
return true, false, nil
}
userID, err := helpers.ConvertStringToInt(userIDString)
if (err != nil){
// Archive is corrupt
return true, false, nil
}
getFileIsReadableStatus := func()bool{
is23andMe := strings.HasSuffix(rawGenomeInfo, ".23andme.txt")
if (is23andMe == true){
// We can read this file
return true
}
isAncestry := strings.HasSuffix(rawGenomeInfo, ".ancestry.txt")
if (isAncestry == true){
// We can read this file
return true
}
// We cannot read this raw genome file
//TODO: Add ability to read more raw genome files
return false
}
fileIsReadable := getFileIsReadableStatus()
if (fileIsReadable == true){
rawGenomeFilepath := goFilepath.Join(openSNPRawGenomesFolderpath, fileName)
existingList, exists := userRawGenomeFilepathsMap[userID]
if (exists == false){
userRawGenomeFilepathsMap[userID] = []string{rawGenomeFilepath}
} else {
existingList = append(existingList, rawGenomeFilepath)
userRawGenomeFilepathsMap[userID] = existingList
}
}
}
// We create the folder to store the training data
_, err = localFilesystem.CreateFolder("./TrainingData")
if (err != nil) { return false, false, err }
//TODO: Add more attributes
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness", "Obesity"}
// We create the folders for each attribute's training data
for _, attributeName := range attributeNamesList{
attributeNameWithoutWhitespace := strings.ReplaceAll(attributeName, " ", "")
folderpath := goFilepath.Join("./TrainingData/", attributeNameWithoutWhitespace)
_, err = localFilesystem.CreateFolder(folderpath)
if (err != nil) { return false, false, err }
folderExists, err := localFilesystem.DeleteAllFolderContents(folderpath)
if (err != nil) { return false, false, err }
if (folderExists == false){
return false, false, errors.New("CreateFolder failed to create folder.")
}
}
numberOfUserPhenotypeDataObjects := len(userPhenotypesList_OpenSNP)
maximumIndex := numberOfUserPhenotypeDataObjects-1
numberOfUsersString := helpers.ConvertIntToString(numberOfUserPhenotypeDataObjects)
for index, userPhenotypeDataObject := range userPhenotypesList_OpenSNP{
createDataIsStoppedBoolMutex.RLock()
createDataIsStopped := createDataIsStoppedBool
createDataIsStoppedBoolMutex.RUnlock()
if (createDataIsStopped == true){
// User exited the process
return false, false, nil
}
userIndexString := helpers.ConvertIntToString(index + 1)
progressDetailsStatus := "Processing User " + userIndexString + "/" + numberOfUsersString
err = progressDetailsBinding.Set(progressDetailsStatus)
if (err != nil) { return false, false, err }
trainingProgressPercentage, err := helpers.ScaleIntProportionally(true, index, 0, maximumIndex, 0, 100)
if (err != nil) { return false, false, err }
trainingProgressFloat64 := float64(trainingProgressPercentage)/100
err = progressPercentageBinding.Set(trainingProgressFloat64)
if (err != nil) { return false, false, err }
processProgressMutex.Lock()
processProgress = trainingProgressFloat64
processProgressMutex.Unlock()
userID := userPhenotypeDataObject.UserID
userRawGenomeFilepathsList, exists := userRawGenomeFilepathsMap[userID]
if (exists == false){
// User has no genomes
continue
}
// We read all of the user's raw genomes and combine them into a single genomeMap which excludes conflicting loci values
userRawGenomesWithMetadataList := make([]prepareRawGenomes.RawGenomeWithMetadata, 0)
for _, userRawGenomeFilepath := range userRawGenomeFilepathsList{
//Outputs:
// -bool: Able to read raw genome file
// -bool: Genome is phased
// -map[int64]readRawGenomes.RawGenomeLocusValue
// -error
readRawGenomeMap := func()(bool, bool, map[int64]readRawGenomes.RawGenomeLocusValue, error){
fileObject, err := os.Open(userRawGenomeFilepath)
if (err != nil) { return false, false, nil, err }
defer fileObject.Close()
_, _, _, _, genomeIsPhased, rawGenomeMap, err := readRawGenomes.ReadRawGenomeFile(fileObject)
if (err != nil) {
//log.Println("Raw genome file is malformed: " + userRawGenomeFilepath + ". Reason: " + err.Error())
return false, false, nil, nil
}
return true, genomeIsPhased, rawGenomeMap, nil
}
ableToReadRawGenome, rawGenomeIsPhased, rawGenomeMap, err := readRawGenomeMap()
if (err != nil){ return false, false, err }
if (ableToReadRawGenome == false){
// We cannot read this genome file
// Many of the genome files are unreadable.
//TODO: Improve ability to read slightly corrupted genome files
continue
}
newGenomeIdentifier, err := helpers.GetNewRandom16ByteArray()
if (err != nil) { return false, false, err }
rawGenomeWithMetadata := prepareRawGenomes.RawGenomeWithMetadata{
GenomeIdentifier: newGenomeIdentifier,
GenomeIsPhased: rawGenomeIsPhased,
RawGenomeMap: rawGenomeMap,
}
userRawGenomesWithMetadataList = append(userRawGenomesWithMetadataList, rawGenomeWithMetadata)
}
if (len(userRawGenomesWithMetadataList) == 0){
// None of the user's genome files are readable
continue
}
//Outputs:
// -bool: Any useful locations exist in any of the user's genomes
// -map[int64]locusValue.LocusValue
// -error
getUserLociValuesMap := func()(bool, map[int64]locusValue.LocusValue, error){
updatePercentageCompleteFunction := func(_ int)error{
return nil
}
anyUsefulLocationsExist, genomesWithMetadataList, _, combinedGenomesExist, onlyExcludeConflictsGenomeIdentifier, _, err := prepareRawGenomes.GetGenomesWithMetadataListFromRawGenomesList(userRawGenomesWithMetadataList, updatePercentageCompleteFunction)
if (err != nil) { return false, nil, err }
if (anyUsefulLocationsExist == false){
// None of the user's genomes have any useful locations
return false, nil, nil
}
if (combinedGenomesExist == false){
if (len(genomesWithMetadataList) != 1){
return false, nil, errors.New("GetGenomesWithMetadataListFromRawGenomesList returning non-1 length genomesWithMetadataList when combinedGenomesExist == false")
}
// Only 1 genome exists
genomeWithMetadataObject := genomesWithMetadataList[0]
genomeMap := genomeWithMetadataObject.GenomeMap
return true, genomeMap, nil
}
for _, genomeWithMetadataObject := range genomesWithMetadataList{
genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier
if (genomeIdentifier == onlyExcludeConflictsGenomeIdentifier){
genomeMap := genomeWithMetadataObject.GenomeMap
return true, genomeMap, nil
}
}
return false, nil, errors.New("OnlyExcludeConflicts genome not found from GetGenomesWithMetadataListFromRawGenomesList's returned list.")
}
anyUsefulLocationsExist, userLociValuesMap, err := getUserLociValuesMap()
if (err != nil) { return false, false, err }
if (anyUsefulLocationsExist == false){
// None of the user's genome files contain any useful locations
continue
}
for _, attributeName := range attributeNamesList{
attributeNameWithoutWhitespace := strings.ReplaceAll(attributeName, " ", "")
trainingDataFolderpath := goFilepath.Join("./TrainingData", attributeNameWithoutWhitespace)
userDataExists, userTrainingDataList, err := geneticPrediction.CreateGeneticPredictionTrainingData_OpenSNP(attributeName, userPhenotypeDataObject, userLociValuesMap)
if (err != nil) { return false, false, err }
if (userDataExists == false){
// User cannot be used for training
continue
}
for index, trainingData := range userTrainingDataList{
userTrainingDataBytes, err := geneticPrediction.EncodeTrainingDataObjectToBytes(trainingData)
if (err != nil) { return false, false, err }
trainingDataIndexString := helpers.ConvertIntToString(index+1)
userIDString := helpers.ConvertIntToString(userID)
trainingDataFilename := "User" + userIDString + "_TrainingData_" + trainingDataIndexString + ".gob"
err = localFilesystem.CreateOrOverwriteFile(userTrainingDataBytes, trainingDataFolderpath, trainingDataFilename)
if (err != nil) { return false, false, err }
}
}
}
createDataIsStoppedBoolMutex.Lock()
createDataIsStoppedBool = true
createDataIsStoppedBoolMutex.Unlock()
return true, true, nil
}
processIsComplete, archiveIsWellFormed, err := createTrainingData()
if (err != nil){
createDataIsStoppedBoolMutex.Lock()
createDataIsStoppedBool = true
createDataIsStoppedBoolMutex.Unlock()
setErrorEncounteredPage(window, err, previousPage)
return
}
if (processIsComplete == false){
// User exited the page
return
}
if (archiveIsWellFormed == false){
title := getBoldLabelCentered("OpenSNP Archive Is Corrupt")
description1 := getBoldLabelCentered("Your downloaded OpenSNP data archive is corrupt.")
description2 := getLabelCentered("The extracted folder contents do not match what the archive should contain.")
description3 := getLabelCentered("You should re-extract the contents of the archive.")
exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), previousPage))
page := container.NewVBox(title, widget.NewSeparator(), description1, description2, description3, exitButton)
window.SetContent(page)
return
}
setCreateTrainingDataIsCompletePage(window)
}
go createTrainingDataFunction()
}
func setCreateTrainingDataIsCompletePage(window fyne.Window){
title := getBoldLabelCentered("Creating Data Is Complete")
description1 := getLabelCentered("Creating training data is complete!")
description2 := getLabelCentered("The data have been saved in the TrainingData folder.")
exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), func(){
setHomePage(window)
}))
page := container.NewVBox(title, widget.NewSeparator(), description1, description2, exitButton)
window.SetContent(page)
}
func setTrainModelsPage(window fyne.Window, previousPage func()){
currentPage := func(){setTrainModelsPage(window, previousPage)}
title := getBoldLabelCentered("Train Models")
backButton := getBackButtonCentered(previousPage)
description1 := getLabelCentered("Press the button below to begin training a genetic model.")
description2 := getLabelCentered("This will train a neural network using the user training data.")
description3 := getLabelCentered("This will take a while.")
description4 := getLabelCentered("You must select a model to train.")
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness", "Obesity"}
attributeNameSelector := widget.NewSelect(attributeNamesList, nil)
beginTrainingButton := getWidgetCentered(widget.NewButtonWithIcon("Begin Training Model", theme.MediaPlayIcon(), func(){
selectedAttributeIndex := attributeNameSelector.SelectedIndex()
if (selectedAttributeIndex < 0){
title := "No Attribute Selected"
dialogMessage1 := getLabelCentered("You must select an attribute model to train.")
dialogContent := container.NewVBox(dialogMessage1)
dialog.ShowCustom(title, "Close", dialogContent, window)
return
}
attributeName := attributeNameSelector.Selected
setStartAndMonitorTrainModelPage(window, attributeName, currentPage)
}))
attributeNameSelectorCentered := getWidgetCentered(attributeNameSelector)
page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, description3, description4, widget.NewSeparator(), attributeNameSelectorCentered, widget.NewSeparator(), beginTrainingButton)
window.SetContent(page)
}
func setStartAndMonitorTrainModelPage(window fyne.Window, attributeName string, previousPage func()){
title := getBoldLabelCentered("Train Model")
//TODO: Verify TrainingData folder integrity
progressDetailsBinding := binding.NewString()
estimatedTimeRemainingBinding := binding.NewString()
progressPercentageBinding := binding.NewFloat()
loadingBar := getWidgetCentered(widget.NewProgressBarWithData(progressPercentageBinding))
progressDetailsTitle := getBoldLabelCentered("Progress Details:")
progressDetailsLabel := widget.NewLabelWithData(progressDetailsBinding)
progressDetailsLabel.TextStyle = fyne.TextStyle{
Bold: false,
Italic: true,
Monospace: false,
}
progressDetailsLabelCentered := getWidgetCentered(progressDetailsLabel)
estimatedTimeRemainingLabel := widget.NewLabelWithData(estimatedTimeRemainingBinding)
estimatedTimeRemainingLabel.TextStyle = fyne.TextStyle{
Bold: false,
Italic: true,
Monospace: false,
}
estimatedTimeRemainingLabelCentered := getWidgetCentered(estimatedTimeRemainingLabel)
// We set this bool to true to stop the trainModel process
var trainModelIsStoppedBoolMutex sync.RWMutex
trainModelIsStoppedBool := false
cancelButton := getWidgetCentered(widget.NewButtonWithIcon("Cancel", theme.CancelIcon(), func(){
trainModelIsStoppedBoolMutex.Lock()
trainModelIsStoppedBool = true
trainModelIsStoppedBoolMutex.Unlock()
previousPage()
}))
page := container.NewVBox(title, widget.NewSeparator(), loadingBar, progressDetailsTitle, progressDetailsLabelCentered, estimatedTimeRemainingLabelCentered, widget.NewSeparator(), cancelButton)
window.SetContent(page)
trainModelFunction := func(){
var processProgressMutex sync.RWMutex
// This stores the amount of progress which has been completed (0-1)
processProgress := float64(0)
startUpdateTimeRemainingDisplayFunction := func(){
// This function updates the estimated time remaining label binding
updateTimeRemainingDisplayFunction := func()error{
startTime := time.Now().Unix()
for{
trainModelIsStoppedBoolMutex.RLock()
trainModelIsStopped := trainModelIsStoppedBool
trainModelIsStoppedBoolMutex.RUnlock()
if (trainModelIsStopped == true){
// User exited the process/Process has completed
return nil
}
processProgressMutex.RLock()
currentProcessProgress := processProgress
processProgressMutex.RUnlock()
if (currentProcessProgress == 0){
estimatedTimeRemainingBinding.Set("Calculating required time...")
time.Sleep(time.Second)
continue
}
// We calculate how long we think it will take for the process to complete
currentTime := time.Now().Unix()
secondsElapsed := currentTime - startTime
// processProgress is a float64 which stores the progress as a value between 0-1
// To get the estimated total time the process will take, we divide the seconds elapsed by the proportion of progress
// For example:
// 0.1 (10%) at 10 seconds == Total process will take 100 seconds
// 0.5 (50%) at 20 seconds == Total process will take 40 seconds
totalSeconds := float64(secondsElapsed)/currentProcessProgress
estimatedSecondsRemaining := int64(totalSeconds) - secondsElapsed
estimatedTimeRemainingTranslated, err := helpers.ConvertUnixTimeDurationToUnitsTimeTranslated(estimatedSecondsRemaining, false)
if (err != nil) { return err }
estimatedTimeRemainingBinding.Set("Estimated Time Remaining: " + estimatedTimeRemainingTranslated)
time.Sleep(time.Second)
}
// This should never be reached
return errors.New("updateTimeRemainingDisplayFunction loop has broken.")
}
err := updateTimeRemainingDisplayFunction()
if (err != nil){
trainModelIsStoppedBoolMutex.Lock()
trainModelIsStoppedBool = true
trainModelIsStoppedBoolMutex.Unlock()
setErrorEncounteredPage(window, err, previousPage)
return
}
}
go startUpdateTimeRemainingDisplayFunction()
//Outputs:
// -bool: Process completed (true == was not stopped mid-way)
// -error
trainModel := func()(bool, error){
_, err := localFilesystem.CreateFolder("./TrainedModels")
if (err != nil) { return false, err }
trainingSetFilepathsList, _, err := getTrainingAndTestingDataFilepathLists(attributeName)
if (err != nil) { return false, err }
// Now we deterministically randomize the order of the trainingSetFilepathsList
pseudorandomNumberGenerator := mathRand.New(mathRand.NewPCG(1, 2))
pseudorandomNumberGenerator.Shuffle(len(trainingSetFilepathsList), func(i int, j int){
trainingSetFilepathsList[i], trainingSetFilepathsList[j] = trainingSetFilepathsList[j], trainingSetFilepathsList[i]
})
// We create a new neural network object to train
neuralNetworkObject, err := geneticPrediction.GetNewUntrainedNeuralNetworkObject(attributeName)
if (err != nil) { return false, err }
// The number of rounds of training for the training data set
totalQuantityOfRoundsToRun := 2
quantityOfTrainingDatasInSet := len(trainingSetFilepathsList)
quantityOfTrainingDatas := len(trainingSetFilepathsList) * totalQuantityOfRoundsToRun
quantityOfTrainingDatasString := helpers.ConvertIntToString(quantityOfTrainingDatas)
// This keeps track of how many training rounds we have completed
// With each round, we shuffle the training data list and train the model again
trainingRoundsCompleted := 0
// This keeps track of how far along we are in training
trainingDataIndex := 0
// This keeps track of how many examples we have trained during all rounds
quantityOfExamplesTrained := 0
// Outputs:
// -bool: User stopped training
// -bool: Another training data exists
// -geneticPrediction.TrainingData
// -error
getNextTrainingDataFunction := func()(bool, bool, geneticPrediction.TrainingData, error){
trainModelIsStoppedBoolMutex.RLock()
trainModelIsStopped := trainModelIsStoppedBool
trainModelIsStoppedBoolMutex.RUnlock()
if (trainModelIsStopped == true){
// User exited the process
return true, false, geneticPrediction.TrainingData{}, nil
}
if (trainingDataIndex == quantityOfTrainingDatasInSet){
// We are done training this set
trainingRoundsCompleted += 1
if (trainingRoundsCompleted == totalQuantityOfRoundsToRun){
// We are done training
return false, false, geneticPrediction.TrainingData{}, nil
}
// We train another round
trainingDataIndex = 0
// We deterministically randomize the order of the training data for the next round
pseudorandomNumberGenerator.Shuffle(len(trainingSetFilepathsList), func(i int, j int){
trainingSetFilepathsList[i], trainingSetFilepathsList[j] = trainingSetFilepathsList[j], trainingSetFilepathsList[i]
})
}
trainingDataFilepath := trainingSetFilepathsList[trainingDataIndex]
fileExists, fileContents, err := localFilesystem.GetFileContents(trainingDataFilepath)
if (err != nil) { return false, false, geneticPrediction.TrainingData{}, err }
if (fileExists == false){
return false, false, geneticPrediction.TrainingData{}, errors.New("TrainingData file not found: " + trainingDataFilepath)
}
trainingDataObject, err := geneticPrediction.DecodeBytesToTrainingDataObject(fileContents)
if (err != nil) { return false, false, geneticPrediction.TrainingData{}, err }
trainingDataIndex += 1
quantityOfExamplesTrained += 1
quantityOfExamplesTrainedString := helpers.ConvertIntToString(quantityOfExamplesTrained)
numberOfExamplesProgress := "Trained " + quantityOfExamplesTrainedString + "/" + quantityOfTrainingDatasString + " Examples"
progressDetailsBinding.Set(numberOfExamplesProgress)
newProgressFloat64 := float64(quantityOfExamplesTrained)/float64(quantityOfTrainingDatas)
err = progressPercentageBinding.Set(newProgressFloat64)
if (err != nil) { return false, false, geneticPrediction.TrainingData{}, err }
processProgressMutex.Lock()
processProgress = newProgressFloat64
processProgressMutex.Unlock()
return false, true, trainingDataObject, nil
}
getAttributeIsNumericBool := func()(bool, error){
switch attributeName{
case "Height",
"Autism",
"Homosexualness",
"Obesity":{
return true, nil
}
case "Lactose Tolerance",
"Eye Color":{
return false, nil
}
}
return false, errors.New("setStartAndMonitorTrainModelPage called with unknown attributeName: " + attributeName)
}
attributeIsNumeric, err := getAttributeIsNumericBool()
if (err != nil) { return false, err }
processCompleted, err := geneticPrediction.TrainNeuralNetwork(attributeName, attributeIsNumeric, neuralNetworkObject, getNextTrainingDataFunction)
if (err != nil) { return false, err }
if (processCompleted == false){
return false, nil
}
// Network training is complete.
// We now save the neural network as a .gob file
neuralNetworkBytes, err := geneticPredictionModels.EncodeNeuralNetworkObjectToBytes(*neuralNetworkObject)
if (err != nil) { return false, err }
attributeNameWithoutWhitespaces := strings.ReplaceAll(attributeName, " ", "")
neuralNetworkFilename := attributeNameWithoutWhitespaces + "Model.gob"
err = localFilesystem.CreateOrOverwriteFile(neuralNetworkBytes, "./TrainedModels/", neuralNetworkFilename)
if (err != nil) { return false, err }
progressPercentageBinding.Set(1)
return true, nil
}
processIsComplete, err := trainModel()
if (err != nil){
trainModelIsStoppedBoolMutex.Lock()
trainModelIsStoppedBool = true
trainModelIsStoppedBoolMutex.Unlock()
setErrorEncounteredPage(window, err, previousPage)
return
}
if (processIsComplete == false){
// User exited the page
return
}
setTrainModelIsCompletePage(window)
}
go trainModelFunction()
}
func setTrainModelIsCompletePage(window fyne.Window){
title := getBoldLabelCentered("Training Model Is Complete")
description1 := getLabelCentered("Model training is complete!")
description2 := getLabelCentered("The model has been saved in the TrainedModels folder.")
exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), func(){
setHomePage(window)
}))
page := container.NewVBox(title, widget.NewSeparator(), description1, description2, exitButton)
window.SetContent(page)
}
func setTestModelsPage(window fyne.Window, previousPage func()){
currentPage := func(){setTestModelsPage(window, previousPage)}
title := getBoldLabelCentered("Test Models")
backButton := getBackButtonCentered(previousPage)
description1 := getLabelCentered("Press the button below to begin testing a genetic model.")
description2 := getLabelCentered("This will test each neural network using user training data examples.")
description3 := getLabelCentered("The testing data is not used to train the models.")
description4 := getLabelCentered("The results of the testing will be displayed at the end.")
description5 := getLabelCentered("The results will also be saved in the ModelAccuracies folder.")
description6 := getLabelCentered("You must select a model to test.")
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness", "Obesity"}
attributeNameSelector := widget.NewSelect(attributeNamesList, nil)
beginTestingButton := getWidgetCentered(widget.NewButtonWithIcon("Begin Testing Model", theme.MediaPlayIcon(), func(){
selectedAttributeIndex := attributeNameSelector.SelectedIndex()
if (selectedAttributeIndex < 0){
title := "No Attribute Selected"
dialogMessage1 := getLabelCentered("You must select a model to test.")
dialogContent := container.NewVBox(dialogMessage1)
dialog.ShowCustom(title, "Close", dialogContent, window)
return
}
attributeName := attributeNameSelector.Selected
setStartAndMonitorTestModelPage(window, attributeName, currentPage)
}))
attributeNameSelectorCentered := getWidgetCentered(attributeNameSelector)
page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, description3, description4, description5, description6, widget.NewSeparator(), attributeNameSelectorCentered, widget.NewSeparator(), beginTestingButton)
window.SetContent(page)
}
func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, previousPage func()){
title := getBoldLabelCentered("Testing Model")
progressDetailsBinding := binding.NewString()
progressPercentageBinding := binding.NewFloat()
loadingBar := getWidgetCentered(widget.NewProgressBarWithData(progressPercentageBinding))
progressDetailsTitle := getBoldLabelCentered("Progress Details:")
progressDetailsLabel := widget.NewLabelWithData(progressDetailsBinding)
progressDetailsLabel.TextStyle = fyne.TextStyle{
Bold: false,
Italic: true,
Monospace: false,
}
progressDetailsLabelCentered := getWidgetCentered(progressDetailsLabel)
// We set this bool to true to stop the testModel process
var testModelIsStoppedBoolMutex sync.RWMutex
testModelIsStoppedBool := false
cancelButton := getWidgetCentered(widget.NewButtonWithIcon("Cancel", theme.CancelIcon(), func(){
testModelIsStoppedBoolMutex.Lock()
testModelIsStoppedBool = true
testModelIsStoppedBoolMutex.Unlock()
previousPage()
}))
page := container.NewVBox(title, widget.NewSeparator(), loadingBar, progressDetailsTitle, progressDetailsLabelCentered, widget.NewSeparator(), cancelButton)
window.SetContent(page)
getAttributeIsNumericBool := func()(bool, error){
switch attributeName{
case "Height",
"Autism",
"Homosexualness",
"Obesity":{
return true, nil
}
case "Lactose Tolerance",
"Eye Color":{
return false, nil
}
}
return false, errors.New("setStartAndMonitorTrainModelPage called with unknown attributeName: " + attributeName)
}
attributeIsNumeric, err := getAttributeIsNumericBool()
if (err != nil) {
setErrorEncounteredPage(window, errors.New("setStartAndMonitorTestModelPage called with unknown attributeName: " + attributeName), previousPage)
return
}
if (attributeIsNumeric == false){
// attribute is a Discrete trait
testModelFunction := func(){
//Outputs:
// -bool: Process completed (true == was not stopped mid-way)
// -geneticPrediction.DiscreteTraitPredictionAccuracyInfoMap
// -error
testModel := func()(bool, trainedPredictionModels.DiscreteTraitPredictionAccuracyInfoMap, error){
type TraitAccuracyStatisticsValue struct{
// This stores the quantity of examples of this outcome
QuantityOfExamples int
// This stores the quantity of predictions that were made for this outcome
// In other words: The quantity of instances where our model predicted this outcome
QuantityOfPredictions int
// This stores the quantity of predictions that were correct when the genome had this outcome
QuantityOfCorrectGenomePredictions int
// This stores the quantity of predictions that were correct when the model predicted this outcome
QuantityOfCorrectOutcomePredictions int
}
// We use this map to count up the information about predictions
// We use information from this map to construct the final accuracy information map
traitPredictionInfoMap := make(map[trainedPredictionModels.DiscreteTraitOutcomeInfo]TraitAccuracyStatisticsValue)
_, testingSetFilepathsList, err := getTrainingAndTestingDataFilepathLists(attributeName)
if (err != nil) { return false, nil, err }
traitNameWithoutWhitespaces := strings.ReplaceAll(attributeName, " ", "")
// We read the trained model for this trait
modelFilename := traitNameWithoutWhitespaces + "Model.gob"
trainedModelFilepath := goFilepath.Join("./TrainedModels/", modelFilename)
fileExists, fileContents, err := localFilesystem.GetFileContents(trainedModelFilepath)
if (err != nil) { return false, nil, err }
if (fileExists == false){
return false, nil, errors.New("TrainedModel not found: " + trainedModelFilepath)
}
neuralNetworkObject, err := geneticPredictionModels.DecodeBytesToNeuralNetworkObject(fileContents)
if (err != nil) { return false, nil, err }
numberOfTrainingDatas := len(testingSetFilepathsList)
numberOfTrainingDatasString := helpers.ConvertIntToString(numberOfTrainingDatas)
finalIndex := numberOfTrainingDatas - 1
for index, filePath := range testingSetFilepathsList{
testModelIsStoppedBoolMutex.RLock()
testModelIsStopped := testModelIsStoppedBool
testModelIsStoppedBoolMutex.RUnlock()
if (testModelIsStopped == true){
// User exited the process
return false, nil, nil
}
fileExists, fileContents, err := localFilesystem.GetFileContents(filePath)
if (err != nil) { return false, nil, err }
if (fileExists == false){
return false, nil, errors.New("TrainingData file not found: " + filePath)
}
trainingDataObject, err := geneticPrediction.DecodeBytesToTrainingDataObject(fileContents)
if (err != nil) { return false, nil, err }
trainingDataInputLayer := trainingDataObject.InputLayer
trainingDataExpectedOutputLayer := trainingDataObject.OutputLayer
predictionLayer, err := geneticPrediction.GetNeuralNetworkRawPrediction(&neuralNetworkObject, false, trainingDataInputLayer)
if (err != nil) { return false, nil, err }
numberOfPredictionNeurons := len(predictionLayer)
if (len(trainingDataExpectedOutputLayer) != numberOfPredictionNeurons){
return false, nil, errors.New("Neural network prediction output length does not match expected output length.")
}
correctOutcomeName, err := geneticPrediction.GetDiscreteOutcomeNameFromOutputLayer(attributeName, true, trainingDataExpectedOutputLayer)
if (err != nil) { return false, nil, err }
predictedOutcomeName, err := geneticPrediction.GetDiscreteOutcomeNameFromOutputLayer(attributeName, true, predictionLayer)
if (err != nil) { return false, nil, err }
getPredictionIsCorrectBool := func()bool{
if (predictedOutcomeName == correctOutcomeName){
return true
}
return false
}
predictionIsCorrect := getPredictionIsCorrectBool()
numberOfKnownLoci, numberOfKnownAndPhasedLoci, numberOfLoci, err := geneticPrediction.GetLociInfoFromNetworkInputLayer(trainingDataInputLayer)
if (err != nil) { return false, nil, err }
proportionOfLociTested := float64(numberOfKnownLoci)/float64(numberOfLoci)
percentageOfLociTested := int(100*proportionOfLociTested)
proportionOfPhasedLoci := float64(numberOfKnownAndPhasedLoci)/float64(numberOfKnownLoci)
percentageOfPhasedLoci := int(100*proportionOfPhasedLoci)
{
// We first add the information to the map for the correct outcome
newTraitOutcomeInfo_CorrectOutcome := trainedPredictionModels.DiscreteTraitOutcomeInfo{
OutcomeName: correctOutcomeName,
PercentageOfLociTested: percentageOfLociTested,
PercentageOfPhasedLoci: percentageOfPhasedLoci,
}
getTraitAccuracyStatisticsValue_CorrectOutcome := func()TraitAccuracyStatisticsValue{
existingTraitAccuracyStatisticsValue, exists := traitPredictionInfoMap[newTraitOutcomeInfo_CorrectOutcome]
if (exists == false){
newTraitAccuracyStatisticsValue := TraitAccuracyStatisticsValue{}
return newTraitAccuracyStatisticsValue
}
return existingTraitAccuracyStatisticsValue
}
traitAccuracyStatisticsValue := getTraitAccuracyStatisticsValue_CorrectOutcome()
traitAccuracyStatisticsValue.QuantityOfExamples += 1
if (predictionIsCorrect == true){
traitAccuracyStatisticsValue.QuantityOfCorrectGenomePredictions += 1
}
traitPredictionInfoMap[newTraitOutcomeInfo_CorrectOutcome] = traitAccuracyStatisticsValue
}
{
// We now add the information to the map for the predicted outcome
newTraitOutcomeInfo_PredictedOutcome := trainedPredictionModels.DiscreteTraitOutcomeInfo{
OutcomeName: predictedOutcomeName,
PercentageOfLociTested: percentageOfLociTested,
PercentageOfPhasedLoci: percentageOfPhasedLoci,
}
getTraitAccuracyStatisticsValue_PredictedOutcome := func()TraitAccuracyStatisticsValue{
existingTraitAccuracyStatisticsValue, exists := traitPredictionInfoMap[newTraitOutcomeInfo_PredictedOutcome]
if (exists == false){
newTraitAccuracyStatisticsValue := TraitAccuracyStatisticsValue{}
return newTraitAccuracyStatisticsValue
}
return existingTraitAccuracyStatisticsValue
}
traitAccuracyStatisticsValue := getTraitAccuracyStatisticsValue_PredictedOutcome()
traitAccuracyStatisticsValue.QuantityOfPredictions += 1
if (predictionIsCorrect == true){
traitAccuracyStatisticsValue.QuantityOfCorrectOutcomePredictions += 1
}
traitPredictionInfoMap[newTraitOutcomeInfo_PredictedOutcome] = traitAccuracyStatisticsValue
}
exampleIndexString := helpers.ConvertIntToString(index+1)
numberOfExamplesProgress := "Tested " + exampleIndexString + "/" + numberOfTrainingDatasString + " Examples"
progressDetailsBinding.Set(numberOfExamplesProgress)
newProgressFloat64 := float64(index)/float64(finalIndex)
progressPercentageBinding.Set(newProgressFloat64)
}
// Now we construct the TraitAccuracyInfoMap
// This map stores the accuracy for each outcome
traitPredictionAccuracyInfoMap := make(map[trainedPredictionModels.DiscreteTraitOutcomeInfo]trainedPredictionModels.DiscreteTraitPredictionAccuracyInfo)
for traitPredictionInfo, value := range traitPredictionInfoMap{
quantityOfExamples := value.QuantityOfExamples
quantityOfPredictions := value.QuantityOfPredictions
quantityOfCorrectGenomePredictions := value.QuantityOfCorrectGenomePredictions
quantityOfCorrectOutcomePredictions := value.QuantityOfCorrectOutcomePredictions
if (quantityOfCorrectGenomePredictions > quantityOfExamples){
return false, nil, errors.New("traitPredictionInfoMap contains quantityOfCorrectGenomePredictions > quantityOfExamples")
}
if (quantityOfCorrectOutcomePredictions > quantityOfPredictions){
return false, nil, errors.New("traitPredictionInfoMap contains quantityOfCorrectOutcomePredictions > quantityOfPredictions")
}
newTraitPredictionAccuracyInfo := trainedPredictionModels.DiscreteTraitPredictionAccuracyInfo{
QuantityOfExamples: quantityOfExamples,
QuantityOfPredictions: quantityOfPredictions,
}
if (quantityOfExamples > 0){
proportionOfCorrectGenomePredictions := float64(quantityOfCorrectGenomePredictions)/float64(quantityOfExamples)
percentageOfCorrectGenomePredictions := int(100*proportionOfCorrectGenomePredictions)
newTraitPredictionAccuracyInfo.ProbabilityOfCorrectGenomePrediction = percentageOfCorrectGenomePredictions
}
if (quantityOfPredictions > 0){
proportionOfCorrectOutcomePredictions := float64(quantityOfCorrectOutcomePredictions)/float64(quantityOfPredictions)
percentageOfCorrectOutcomePredictions := int(100*proportionOfCorrectOutcomePredictions)
newTraitPredictionAccuracyInfo.ProbabilityOfCorrectOutcomePrediction = percentageOfCorrectOutcomePredictions
}
traitPredictionAccuracyInfoMap[traitPredictionInfo] = newTraitPredictionAccuracyInfo
}
// Testing is complete.
// We save the info map as a file in the ModelAccuracies folder
fileBytes, err := trainedPredictionModels.EncodeDiscreteTraitPredictionAccuracyInfoMapToBytes(traitPredictionAccuracyInfoMap)
if (err != nil) { return false, nil, err }
_, err = localFilesystem.CreateFolder("./ModelAccuracies")
if (err != nil) { return false, nil, err }
modelAccuracyFilename := traitNameWithoutWhitespaces + "ModelAccuracy.gob"
err = localFilesystem.CreateOrOverwriteFile(fileBytes, "./ModelAccuracies/", modelAccuracyFilename)
if (err != nil) { return false, nil, err }
progressPercentageBinding.Set(1)
return true, traitPredictionAccuracyInfoMap, nil
}
processIsComplete, traitPredictionAccuracyInfoMap, err := testModel()
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
return
}
if (processIsComplete == false){
// User exited the page
return
}
setViewModelTestingDiscreteTraitResultsPage(window, attributeName, traitPredictionAccuracyInfoMap, previousPage)
}
go testModelFunction()
return
} else {
// attribute is Numeric
testModelFunction := func(){
//Outputs:
// -bool: Process completed (true == was not stopped mid-way)
// -geneticPrediction.NumericAttributePredictionAccuracyInfoMap
// -error
testModel := func()(bool, trainedPredictionModels.NumericAttributePredictionAccuracyInfoMap, error){
// We use this map to count up the information about predictions
// We use information from this map to construct the final accuracy information map
// Map Structure: NumericAttributePredictionInfo -> []float64 (List of distances for each prediction)
attributePredictionInfoMap := make(map[trainedPredictionModels.NumericAttributePredictionInfo][]float64)
_, testingSetFilepathsList, err := getTrainingAndTestingDataFilepathLists(attributeName)
if (err != nil) { return false, nil, err }
attributeNameWithoutWhitespaces := strings.ReplaceAll(attributeName, " ", "")
// We read the trained model for this attribute
modelFilename := attributeNameWithoutWhitespaces + "Model.gob"
trainedModelFilepath := goFilepath.Join("./TrainedModels/", modelFilename)
fileExists, fileContents, err := localFilesystem.GetFileContents(trainedModelFilepath)
if (err != nil) { return false, nil, err }
if (fileExists == false){
return false, nil, errors.New("TrainedModel not found: " + trainedModelFilepath)
}
neuralNetworkObject, err := geneticPredictionModels.DecodeBytesToNeuralNetworkObject(fileContents)
if (err != nil) { return false, nil, err }
numberOfTrainingDatas := len(testingSetFilepathsList)
numberOfTrainingDatasString := helpers.ConvertIntToString(numberOfTrainingDatas)
finalIndex := numberOfTrainingDatas - 1
for index, filePath := range testingSetFilepathsList{
testModelIsStoppedBoolMutex.RLock()
testModelIsStopped := testModelIsStoppedBool
testModelIsStoppedBoolMutex.RUnlock()
if (testModelIsStopped == true){
// User exited the process
return false, nil, nil
}
fileExists, fileContents, err := localFilesystem.GetFileContents(filePath)
if (err != nil) { return false, nil, err }
if (fileExists == false){
return false, nil, errors.New("TrainingData file not found: " + filePath)
}
trainingDataObject, err := geneticPrediction.DecodeBytesToTrainingDataObject(fileContents)
if (err != nil) { return false, nil, err }
trainingDataInputLayer := trainingDataObject.InputLayer
trainingDataExpectedOutputLayer := trainingDataObject.OutputLayer
if (len(trainingDataExpectedOutputLayer) != 1){
return false, nil, errors.New("Neural network training data prediction output layer length is not 1.")
}
predictionLayer, err := geneticPrediction.GetNeuralNetworkRawPrediction(&neuralNetworkObject, true, trainingDataInputLayer)
if (err != nil) { return false, nil, err }
if (len(predictionLayer) != 1){
return false, nil, errors.New("Neural network numeric prediction output layer length is not 1.")
}
correctOutcomeValue, err := geneticPrediction.GetNumericOutcomeValueFromOutputLayer(attributeName, trainingDataExpectedOutputLayer)
if (err != nil) { return false, nil, err }
predictedOutcomeValue, err := geneticPrediction.GetNumericOutcomeValueFromOutputLayer(attributeName, predictionLayer)
if (err != nil) { return false, nil, err }
numberOfKnownLoci, numberOfKnownAndPhasedLoci, numberOfLoci, err := geneticPrediction.GetLociInfoFromNetworkInputLayer(trainingDataInputLayer)
if (err != nil) { return false, nil, err }
proportionOfLociTested := float64(numberOfKnownLoci)/float64(numberOfLoci)
percentageOfLociTested := int(100*proportionOfLociTested)
proportionOfPhasedLoci := float64(numberOfKnownAndPhasedLoci)/float64(numberOfKnownLoci)
percentageOfPhasedLoci := int(100*proportionOfPhasedLoci)
newNumericAttributePredictionInfo := trainedPredictionModels.NumericAttributePredictionInfo{
PercentageOfLociTested: percentageOfLociTested,
PercentageOfPhasedLoci: percentageOfPhasedLoci,
}
distanceFromCorrectValue := math.Abs(predictedOutcomeValue - correctOutcomeValue)
existingList, exists := attributePredictionInfoMap[newNumericAttributePredictionInfo]
if (exists == false){
attributePredictionInfoMap[newNumericAttributePredictionInfo] = []float64{distanceFromCorrectValue}
} else {
existingList = append(existingList, distanceFromCorrectValue)
attributePredictionInfoMap[newNumericAttributePredictionInfo] = existingList
}
exampleIndexString := helpers.ConvertIntToString(index+1)
numberOfExamplesProgress := "Tested " + exampleIndexString + "/" + numberOfTrainingDatasString + " Examples"
progressDetailsBinding.Set(numberOfExamplesProgress)
newProgressFloat64 := float64(index)/float64(finalIndex)
progressPercentageBinding.Set(newProgressFloat64)
}
// Now we construct the AttributeAccuracyInfoMap
// This map stores the accuracy for each QuantityOfKnownLoci/QuantityOfPhasedLoci
attributePredictionAccuracyInfoMap := make(map[trainedPredictionModels.NumericAttributePredictionInfo]trainedPredictionModels.NumericAttributePredictionAccuracyRangesMap)
for attributePredictionInfo, predictionDistancesList := range attributePredictionInfoMap{
if (len(predictionDistancesList) == 0){
return false, nil, errors.New("attributePredictionInfoMap contains empty predictionDistancesList.")
}
// Map Structure: Accuracy Percentage (AP) -> Amount needed to deviate from prediction
// for the value to be accurate (AP)% of the time
newNumericAttributePredictionAccuracyRangesMap := make(map[int]float64)
if (len(predictionDistancesList) < 5){
// We don't have enough data to create an accuracyRanges map.
continue
}
// We sort the prediction distances list in ascending order
slices.Sort(predictionDistancesList)
finalIndex := len(predictionDistancesList) - 1
for index, distance := range predictionDistancesList{
proportionOfPredictionsWithinDistance := float64(index)/float64(finalIndex)
percentageOfPredictionsWithinDistance := int(100 * proportionOfPredictionsWithinDistance)
if (percentageOfPredictionsWithinDistance == 0){
// 0% accuracy is not a useful metric for users
continue
}
_, exists := newNumericAttributePredictionAccuracyRangesMap[percentageOfPredictionsWithinDistance]
if (exists == true){
// There exists a value for this percentage already
// This happens because we convert a float64 to an int
// The existing percentage must be smaller than our current percentage
// We want to keep that smaller percentage
// For example, we would rather keep the 15.1% value than the 15.8% value.
continue
}
newNumericAttributePredictionAccuracyRangesMap[percentageOfPredictionsWithinDistance] = distance
}
attributePredictionAccuracyInfoMap[attributePredictionInfo] = newNumericAttributePredictionAccuracyRangesMap
}
// Testing is complete.
// We save the info map as a file in the ModelAccuracies folder
fileBytes, err := trainedPredictionModels.EncodeNumericAttributePredictionAccuracyInfoMapToBytes(attributePredictionAccuracyInfoMap)
if (err != nil) { return false, nil, err }
_, err = localFilesystem.CreateFolder("./ModelAccuracies")
if (err != nil) { return false, nil, err }
modelAccuracyFilename := attributeNameWithoutWhitespaces + "ModelAccuracy.gob"
err = localFilesystem.CreateOrOverwriteFile(fileBytes, "./ModelAccuracies/", modelAccuracyFilename)
if (err != nil) { return false, nil, err }
progressPercentageBinding.Set(1)
return true, attributePredictionAccuracyInfoMap, nil
}
processIsComplete, attributePredictionAccuracyInfoMap, err := testModel()
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
return
}
if (processIsComplete == false){
// User exited the page
return
}
setViewModelTestingNumericAttributeResultsPage(window, attributeName, attributePredictionAccuracyInfoMap, previousPage)
return
}
go testModelFunction()
}
}
// This is a page to view the details of testing for a specific trait's model
func setViewModelTestingDiscreteTraitResultsPage(window fyne.Window, traitName string, traitAccuracyInfoMap trainedPredictionModels.DiscreteTraitPredictionAccuracyInfoMap, exitPage func()){
title := getBoldLabelCentered("Discrete Trait Prediction Accuracy Details")
exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), exitPage))
description1 := getLabelCentered("The results of the prediction accuracy for this trait are below.")
traitNameTitle := widget.NewLabel("Trait Name:")
traitNameLabel := getBoldLabel(traitName)
traitNameRow := container.NewHBox(layout.NewSpacer(), traitNameTitle, traitNameLabel, layout.NewSpacer())
description2 := getLabelCentered("Prediction accuracy values are a pair of Genome Accuracy/Outcome Accuracy.")
description3 := getLabelCentered("Genome Accuracy is the probability that the model will predict a genome's trait value correctly.")
description4 := getLabelCentered("Outcome Accuracy is the probability that a trait prediction that the model makes is correct.")
getResultsGrid := func()(*fyne.Container, error){
emptyLabel1 := widget.NewLabel("")
outcomeNameTitle := getItalicLabelCentered("Outcome Name")
predictionAccuracyTitle1 := getItalicLabelCentered("Prediction Accuracy")
knownLociLabel_0to33 := getItalicLabelCentered("0-33% Known Loci")
predictionAccuracyTitle2 := getItalicLabelCentered("Prediction Accuracy")
knownLociLabel_34to66 := getItalicLabelCentered("34-66% Known Loci")
predictionAccuracyTitle3 := getItalicLabelCentered("Prediction Accuracy")
knownLociLabel_67to100 := getItalicLabelCentered("67-100% Known Loci")
outcomeNameColumn := container.NewVBox(emptyLabel1, outcomeNameTitle, widget.NewSeparator())
predictionAccuracyColumn_0to33 := container.NewVBox(predictionAccuracyTitle1, knownLociLabel_0to33, widget.NewSeparator())
predictionAccuracyColumn_34to66 := container.NewVBox(predictionAccuracyTitle2, knownLociLabel_34to66, widget.NewSeparator())
predictionAccuracyColumn_67to100 := container.NewVBox(predictionAccuracyTitle3, knownLociLabel_67to100, widget.NewSeparator())
traitObject, err := traits.GetTraitObject(traitName)
if (err != nil) { return nil, err }
traitIsDiscreteOrNumeric := traitObject.DiscreteOrNumeric
if (traitIsDiscreteOrNumeric != "Discrete"){
return nil, errors.New("setViewModelTestingDiscreteTraitResultsPage called with non-discrete trait: " + traitName)
}
outcomeNamesList := traitObject.OutcomesList
for _, outcomeName := range outcomeNamesList{
outcomeNameLabel := getBoldLabelCentered(outcomeName)
// We use the below variables to sum up the accuracy percentages so we can average them
genomePredictionAccuracySum_0to33 := 0
genomeExampleCount_0to33 := 0
outcomePredictionAccuracySum_0to33 := 0
outcomePredictionCount_0to33 := 0
genomePredictionAccuracySum_34to66 := 0
genomeExampleCount_34to66 := 0
outcomePredictionAccuracySum_34to66 := 0
outcomePredictionCount_34to66 := 0
genomePredictionAccuracySum_67to100 := 0
genomeExampleCount_67to100 := 0
outcomePredictionAccuracySum_67to100 := 0
outcomePredictionCount_67to100 := 0
for traitOutcomeInfo, traitPredictionAccuracyInfo := range traitAccuracyInfoMap{
currentOutcomeName := traitOutcomeInfo.OutcomeName
if (currentOutcomeName != outcomeName){
continue
}
percentageOfLociTested := traitOutcomeInfo.PercentageOfLociTested
quantityOfExamples := traitPredictionAccuracyInfo.QuantityOfExamples
quantityOfPredictions := traitPredictionAccuracyInfo.QuantityOfPredictions
genomePredictionAccuracyPercentage := traitPredictionAccuracyInfo.ProbabilityOfCorrectGenomePrediction
outcomePredictionAccuracyPercentage := traitPredictionAccuracyInfo.ProbabilityOfCorrectOutcomePrediction
if (percentageOfLociTested <= 33){
genomePredictionAccuracySum_0to33 += (genomePredictionAccuracyPercentage * quantityOfExamples)
genomeExampleCount_0to33 += quantityOfExamples
outcomePredictionAccuracySum_0to33 += (outcomePredictionAccuracyPercentage * quantityOfPredictions)
outcomePredictionCount_0to33 += quantityOfPredictions
} else if (percentageOfLociTested > 33 && percentageOfLociTested <= 66){
genomePredictionAccuracySum_34to66 += (genomePredictionAccuracyPercentage * quantityOfExamples)
genomeExampleCount_34to66 += quantityOfExamples
outcomePredictionAccuracySum_34to66 += (outcomePredictionAccuracyPercentage * quantityOfPredictions)
outcomePredictionCount_34to66 += quantityOfPredictions
} else {
genomePredictionAccuracySum_67to100 += (genomePredictionAccuracyPercentage * quantityOfExamples)
genomeExampleCount_67to100 += quantityOfExamples
outcomePredictionAccuracySum_67to100 += (outcomePredictionAccuracyPercentage * quantityOfPredictions)
outcomePredictionCount_67to100 += quantityOfPredictions
}
}
getAverageAccuracyText := func(accuracySum int, predictionCount int)string{
if (predictionCount == 0){
return "Unknown"
}
averageAccuracy := accuracySum/predictionCount
averageAccuracyString := helpers.ConvertIntToString(averageAccuracy)
result := averageAccuracyString + "%"
return result
}
genomeAverageAccuracyText_0to33 := getAverageAccuracyText(genomePredictionAccuracySum_0to33, genomeExampleCount_0to33)
genomeAverageAccuracyText_34to66 := getAverageAccuracyText(genomePredictionAccuracySum_34to66, genomeExampleCount_34to66)
genomeAverageAccuracyText_67to100 := getAverageAccuracyText(genomePredictionAccuracySum_67to100, genomeExampleCount_67to100)
outcomeAverageAccuracyText_0to33 := getAverageAccuracyText(outcomePredictionAccuracySum_0to33, outcomePredictionCount_0to33)
outcomeAverageAccuracyText_34to66 := getAverageAccuracyText(outcomePredictionAccuracySum_34to66, outcomePredictionCount_34to66)
outcomeAverageAccuracyText_67to100 := getAverageAccuracyText(outcomePredictionAccuracySum_67to100, outcomePredictionCount_67to100)
averageAccuracyLabel_0to33 := getBoldLabelCentered(genomeAverageAccuracyText_0to33 + "/" + outcomeAverageAccuracyText_0to33)
averageAccuracyLabel_34to66 := getBoldLabelCentered(genomeAverageAccuracyText_34to66 + "/" + outcomeAverageAccuracyText_34to66)
averageAccuracyLabel_67to100 := getBoldLabelCentered(genomeAverageAccuracyText_67to100 + "/" + outcomeAverageAccuracyText_67to100)
outcomeNameColumn.Add(outcomeNameLabel)
predictionAccuracyColumn_0to33.Add(averageAccuracyLabel_0to33)
predictionAccuracyColumn_34to66.Add(averageAccuracyLabel_34to66)
predictionAccuracyColumn_67to100.Add(averageAccuracyLabel_67to100)
outcomeNameColumn.Add(widget.NewSeparator())
predictionAccuracyColumn_0to33.Add(widget.NewSeparator())
predictionAccuracyColumn_34to66.Add(widget.NewSeparator())
predictionAccuracyColumn_67to100.Add(widget.NewSeparator())
}
resultsGrid := container.NewHBox(layout.NewSpacer(), outcomeNameColumn, predictionAccuracyColumn_0to33, predictionAccuracyColumn_34to66, predictionAccuracyColumn_67to100, layout.NewSpacer())
return resultsGrid, nil
}
resultsGrid, err := getResultsGrid()
if (err != nil){
setErrorEncounteredPage(window, err, func(){setHomePage(window)})
return
}
page := container.NewVBox(title, exitButton, widget.NewSeparator(), description1, widget.NewSeparator(), traitNameRow, widget.NewSeparator(), description2, description3, description4, widget.NewSeparator(), resultsGrid)
window.SetContent(page)
}
// This is a page to view the details of testing for a numeric attribute's model
func setViewModelTestingNumericAttributeResultsPage(window fyne.Window, attributeName string, attributeAccuracyInfoMap trainedPredictionModels.NumericAttributePredictionAccuracyInfoMap, exitPage func()){
title := getBoldLabelCentered("Numeric Attribute Prediction Accuracy Details")
exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), exitPage))
description1 := getLabelCentered("The results of the prediction accuracy for this attribute are below.")
attributeNameTitle := widget.NewLabel("Attribute Name:")
attributeNameLabel := getBoldLabel(attributeName)
attributeNameRow := container.NewHBox(layout.NewSpacer(), attributeNameTitle, attributeNameLabel, layout.NewSpacer())
description2 := getLabelCentered("Each value is a range that the prediction must be widened by to be accurate X% of the time.")
description3 := getLabelCentered("For example, for a height prediction to be accurate 90% of the time, allow a +/-10 cm range.")
getResultsGrid := func()(*fyne.Container, error){
probabilityOfTitle := getItalicLabelCentered("Probability Of")
correctPredictionTitle := getItalicLabelCentered("Correct Prediction")
accuracyRangeTitle1 := getItalicLabelCentered("Accuracy Range")
knownLociLabel_0to33 := getItalicLabelCentered("0-33% Known Loci")
accuracyRangeTitle2 := getItalicLabelCentered("Accuracy Range")
knownLociLabel_34to66 := getItalicLabelCentered("34-66% Known Loci")
accuracyRangeTitle3 := getItalicLabelCentered("Accuracy Range")
knownLociLabel_67to100 := getItalicLabelCentered("67-100% Known Loci")
probabilityOfCorrectPredictionColumn := container.NewVBox(probabilityOfTitle, correctPredictionTitle, widget.NewSeparator())
accuracyRangeColumn_0to33 := container.NewVBox(accuracyRangeTitle1, knownLociLabel_0to33, widget.NewSeparator())
accuracyRangeColumn_34to66 := container.NewVBox(accuracyRangeTitle2, knownLociLabel_34to66, widget.NewSeparator())
accuracyRangeColumn_67to100 := container.NewVBox(accuracyRangeTitle3, knownLociLabel_67to100, widget.NewSeparator())
// We get the formatter for the distance values
// This converts raw predictions to formatted values
// Example: 100 -> "100 centimeters"
getAttributeValueFormatter := func()(func(float64, bool)(string, error), error){
switch attributeName{
case "Homosexuality",
"Height":{
traitObject, err := traits.GetTraitObject(attributeName)
if (err != nil) { return nil, err }
numericValueFormatter := traitObject.NumericValueFormatter
return numericValueFormatter, nil
}
}
// attribute is a polygenic disease
result := func(inputValue float64, _ bool)(string, error){
// Input value is a value between 0 and 10
inputValueFormatted := helpers.ConvertIntToString(int(inputValue))
return inputValueFormatted, nil
}
return result, nil
}
attributeValueFormatter, err := getAttributeValueFormatter()
if (err != nil){ return nil, err }
probabilityMinimumRange := 1
for {
if (probabilityMinimumRange == 100){
break
}
getProbabilityMaximumRange := func()int{
if (probabilityMinimumRange == 90){
return 100
}
probabilityMaximumRange := probabilityMinimumRange + 9
return probabilityMaximumRange
}
probabilityMaximumRange := getProbabilityMaximumRange()
probabilityMinimumRangeString := helpers.ConvertIntToString(probabilityMinimumRange)
probabilityMaximumRangeString := helpers.ConvertIntToString(probabilityMaximumRange)
probabilityOfCorrectPredictionRangeFormatted := probabilityMinimumRangeString + "% - " + probabilityMaximumRangeString + "%"
probabilityOfCorrectPredictionRangeLabel := getBoldLabelCentered(probabilityOfCorrectPredictionRangeFormatted)
// We use the below variables to sum up the accuracy distances so we can average them
predictionAccuracyDistancesSum_0to33 := float64(0)
distancesCount_0to33 := 0
predictionAccuracyDistancesSum_34to66 := float64(0)
distancesCount_34to66 := 0
predictionAccuracyDistancesSum_67to100 := float64(0)
distancesCount_67to100 := 0
for attributeOutcomeInfo, attributePredictionAccuracyRangesMap := range attributeAccuracyInfoMap{
percentageOfLociTested := attributeOutcomeInfo.PercentageOfLociTested
for percentageCorrect, distance := range attributePredictionAccuracyRangesMap{
if (percentageCorrect < probabilityMinimumRange || percentageCorrect > probabilityMaximumRange){
continue
}
if (percentageOfLociTested <= 33){
predictionAccuracyDistancesSum_0to33 += distance
distancesCount_0to33 += 1
} else if (percentageOfLociTested > 33 && percentageOfLociTested <= 66){
predictionAccuracyDistancesSum_34to66 += distance
distancesCount_34to66 += 1
} else {
predictionAccuracyDistancesSum_67to100 += distance
distancesCount_67to100 += 1
}
}
}
getAverageAccuracyText := func(distancesSum float64, distancesCount int)(string, error){
if (distancesCount == 0){
return "Unknown", nil
}
averageDistance := distancesSum/float64(distancesCount)
averageDistanceFormatted, err := attributeValueFormatter(averageDistance, false)
if (err != nil) { return "", err }
result := "+/- " + averageDistanceFormatted
return result, nil
}
averageDistanceText_0to33, err := getAverageAccuracyText(predictionAccuracyDistancesSum_0to33, distancesCount_0to33)
if (err != nil){ return nil, err }
averageDistanceText_34to66, err := getAverageAccuracyText(predictionAccuracyDistancesSum_34to66, distancesCount_34to66)
if (err != nil){ return nil, err }
averageDistanceText_67to100, err := getAverageAccuracyText(predictionAccuracyDistancesSum_67to100, distancesCount_67to100)
if (err != nil){ return nil, err }
averageDistanceLabel_0to33 := getBoldLabelCentered(averageDistanceText_0to33)
averageDistanceLabel_34to66 := getBoldLabelCentered(averageDistanceText_34to66)
averageDistanceLabel_67to100 := getBoldLabelCentered(averageDistanceText_67to100)
probabilityOfCorrectPredictionColumn.Add(probabilityOfCorrectPredictionRangeLabel)
accuracyRangeColumn_0to33.Add(averageDistanceLabel_0to33)
accuracyRangeColumn_34to66.Add(averageDistanceLabel_34to66)
accuracyRangeColumn_67to100.Add(averageDistanceLabel_67to100)
probabilityOfCorrectPredictionColumn.Add(widget.NewSeparator())
accuracyRangeColumn_0to33.Add(widget.NewSeparator())
accuracyRangeColumn_34to66.Add(widget.NewSeparator())
accuracyRangeColumn_67to100.Add(widget.NewSeparator())
if (probabilityMinimumRange == 1){
probabilityMinimumRange = 10
} else {
probabilityMinimumRange += 10
}
}
resultsGrid := container.NewHBox(layout.NewSpacer(), probabilityOfCorrectPredictionColumn, accuracyRangeColumn_0to33, accuracyRangeColumn_34to66, accuracyRangeColumn_67to100, layout.NewSpacer())
return resultsGrid, nil
}
resultsGrid, err := getResultsGrid()
if (err != nil){
setErrorEncounteredPage(window, err, func(){setHomePage(window)})
return
}
page := container.NewVBox(title, exitButton, widget.NewSeparator(), description1, widget.NewSeparator(), attributeNameRow, widget.NewSeparator(), description2, description3, widget.NewSeparator(), resultsGrid)
pageScrollable := container.NewVScroll(page)
window.SetContent(pageScrollable)
}
// This function returns a list of training data and testing data filepaths for an attribute.
//Outputs:
// -[]string: Sorted list of training data filepaths
// -[]string: Unsorted list of testing data filepaths
// -error
func getTrainingAndTestingDataFilepathLists(attributeName string)([]string, []string, error){
attributeNameWithoutWhitespaces := strings.ReplaceAll(attributeName, " ", "")
trainingDataFolderpath := goFilepath.Join("./TrainingData/", attributeNameWithoutWhitespaces)
filesList, err := os.ReadDir(trainingDataFolderpath)
if (err != nil) { return nil, nil, err }
// This map stores the file name for each training data
trainingDataFilenamesMap := make(map[string]struct{})
for _, filesystemObject := range filesList{
filepathIsFolder := filesystemObject.IsDir()
if (filepathIsFolder == true){
// Folder is corrupt
return nil, nil, errors.New("Training data is corrupt for attribute: " + attributeName)
}
fileName := filesystemObject.Name()
trainingDataFilenamesMap[fileName] = struct{}{}
}
numberOfTrainingDataFiles := len(trainingDataFilenamesMap)
if (numberOfTrainingDataFiles == 0){
return nil, nil, errors.New("No training data exists for attribute: " + attributeName)
}
getNumberOfExpectedTrainingDatas := func()(int, error){
switch attributeName{
case "Eye Color":{
return 149894, nil
}
case "Lactose Tolerance":{
return 24872, nil
}
case "Height":{
return 92281, nil
}
case "Autism":{
return 32118, nil
}
case "Homosexualness":{
return 14500, nil
}
case "Obesity":{
return 24009, nil
}
}
return 0, errors.New("Unknown attributeName: " + attributeName)
}
numberOfExpectedTrainingDatas, err := getNumberOfExpectedTrainingDatas()
if (err != nil){ return nil, nil, err }
if (numberOfTrainingDataFiles != numberOfExpectedTrainingDatas){
numberOfTrainingDataFilesString := helpers.ConvertIntToString(numberOfTrainingDataFiles)
return nil, nil, errors.New(attributeName + " quantity of training datas is unexpected: " + numberOfTrainingDataFilesString)
}
// We sort the training data to be in a deterministically random order
// This allows us to train the neural network in the same order each time
// We do this so we can generate deterministic models which are identical byte-for-byte
// We have to set aside 200 user's training datas for testing the neural network
//
// We have to remove them per-user because each user has 110 training datas.
// Otherwise, we would be training and testing on data from the same users.
// We need to test with users that the models were never trained upon.
// First we extract the user identifiers from the data
userIdentifiersMap := make(map[int]struct{})
for trainingDataFilename, _ := range trainingDataFilenamesMap{
// Example filepath format: "User4680_TrainingData_89.gob"
trimmedFilename := strings.TrimPrefix(trainingDataFilename, "User")
userIdentifierString, _, underscoreExists := strings.Cut(trimmedFilename, "_")
if (underscoreExists == false){
return nil, nil, errors.New("Invalid trainingData filename: " + trainingDataFilename)
}
userIdentifier, err := helpers.ConvertStringToInt(userIdentifierString)
if (err != nil){
return nil, nil, errors.New("Invalid trainingData filename: " + trainingDataFilename)
}
userIdentifiersMap[userIdentifier] = struct{}{}
}
userIdentifiersList := helpers.GetListOfMapKeys(userIdentifiersMap)
// We sort the user identifiers list in ascending order
slices.Sort(userIdentifiersList)
// Now we deterministically randomize the order of the user identifiers list
pseudorandomNumberGenerator := mathRand.New(mathRand.NewPCG(1, 2))
pseudorandomNumberGenerator.Shuffle(len(userIdentifiersList), func(i int, j int){
userIdentifiersList[i], userIdentifiersList[j] = userIdentifiersList[j], userIdentifiersList[i]
})
trainingSetFilepathsList := make([]string, 0)
testingSetFilepathsList := make([]string, 0)
numberOfUsers := len(userIdentifiersList)
if (numberOfUsers < 250){
return nil, nil, errors.New("Too few training data examples for attribute: " + attributeName)
}
// We use 200 users for testing (validation), so we don't train using them
numberOfTrainingUsers := numberOfUsers - 200
for index, userIdentifier := range userIdentifiersList{
// Example filepath format: "User4680_TrainingData_89.gob"
userIdentifierString := helpers.ConvertIntToString(userIdentifier)
trainingDataFilenamePrefix := "User" + userIdentifierString + "_TrainingData_"
for k:=1; k <= 110; k++{
kString := helpers.ConvertIntToString(k)
trainingDataFilename := trainingDataFilenamePrefix + kString + ".gob"
_, fileExists := trainingDataFilenamesMap[trainingDataFilename]
if (fileExists == false){
// Some training datas don't exist due to how training datas are randomly created
// Sometimes, no alleles exist, so we skip creating the training data
continue
}
trainingDataFilepath := goFilepath.Join(trainingDataFolderpath, trainingDataFilename)
if (index < numberOfTrainingUsers){
trainingSetFilepathsList = append(trainingSetFilepathsList, trainingDataFilepath)
} else {
testingSetFilepathsList = append(testingSetFilepathsList, trainingDataFilepath)
}
}
}
return trainingSetFilepathsList, testingSetFilepathsList, nil
}
// We use this to define a custom fyne theme
// We are only overriding the foreground color to pure black
type customTheme struct{
defaultTheme fyne.Theme
}
func getCustomFyneTheme()fyne.Theme{
standardThemeObject := theme.LightTheme()
newTheme := customTheme{
defaultTheme: standardThemeObject,
}
return newTheme
}
// This function is used to define our custom fyne themes
// It changes a few default colors, while leaving all other colors the same as the default theme
func (input customTheme)Color(colorName fyne.ThemeColorName, variant fyne.ThemeVariant)color.Color{
switch colorName{
case theme.ColorNameForeground:{
newColor := color.Black
return newColor
}
case theme.ColorNameSeparator:{
// This is the color used for separators
newColor := color.Black
return newColor
}
case theme.ColorNameInputBackground:{
// This color is used for the background of input elements such as text entries
newColor, err := imagery.GetColorObjectFromColorCode("b3b3b3")
if (err == nil){
return newColor
}
}
case theme.ColorNameButton:{
// This is the color used for buttons
newColor, err := imagery.GetColorObjectFromColorCode("d8d8d8")
if (err == nil){
return newColor
}
}
case theme.ColorNamePlaceHolder:{
// This is the color used for text
newColor, err := imagery.GetColorObjectFromColorCode("4d4d4d")
if (err == nil){
return newColor
}
}
}
// We will use the default color for this theme
return input.defaultTheme.Color(colorName, variant)
}
// Our custom themes change nothing about the default theme fonts
func (input customTheme)Font(style fyne.TextStyle)fyne.Resource{
themeFont := input.defaultTheme.Font(style)
return themeFont
}
// Our custom themes change nothing about the default theme icons
func (input customTheme)Icon(iconName fyne.ThemeIconName)fyne.Resource{
themeIcon := input.defaultTheme.Icon(iconName)
return themeIcon
}
func (input customTheme)Size(name fyne.ThemeSizeName)float32{
themeSize := input.defaultTheme.Size(name)
if (name == theme.SizeNameText){
// After fyne v2.3.0, text labels are no longer the same height as buttons
// We increase the text size so that a text label is the same height as a button
// We need to increase text size because we are creating grids by creating multiple VBoxes, and connecting them with an HBox
//
// If we could create grids in a different way, we could avoid having to do this
// Example: Create a new grid type: container.NewThinGrid?
// -The columns will only be as wide as the the widest element within them
// -We can add separators between each row (grid.ShowRowLines = true) or between columns (grid.ShowColumnLines = true)
// -We can add borders (grid.ShowTopBorder = true, grid.ShowBottomBorder = true, grid.ShowLeftBorder = true, grid.ShowRightBorder = true)
// Using a different grid type is the solution we need to eventually use
// Then, we can show the user an option to increase the text size globally, and all grids will still render correctly
result := themeSize * 1.08
return result
}
return themeSize
}