// createGeneticModels.go provides an interface to create genetic prediction models
// These are neural networks which predict traits such as eye color from raw genome files
// The dataset is used, and more datasets will be added in the future.
// You must download the dataset and extract it. The instructions are described in the utility.
package main
import ""
import ""
import ""
import ""
import ""
import ""
import ""
import ""
import "seekia/resources/geneticReferences/traits"
import "seekia/resources/geneticReferences/locusMetadata"
import "seekia/internal/encoding"
import "seekia/internal/genetics/locusValue"
import "seekia/internal/genetics/prepareRawGenomes"
import "seekia/internal/genetics/readRawGenomes"
import "seekia/internal/genetics/geneticPrediction"
import "seekia/internal/helpers"
import "seekia/internal/imagery"
import "seekia/internal/localFilesystem"
import "seekia/internal/genetics/readBiobankData"
import "errors"
import "crypto/sha256"
import "bytes"
import "image/color"
import "io"
import "os"
import "strings"
import "sync"
import "slices"
import mathRand "math/rand/v2"
import goFilepath "path/filepath"
func main(){
app := app.New()
customTheme := getCustomFyneTheme()
window := app.NewWindow("Seekia - Create Genetic Models Utility")
windowSize := fyne.NewSize(600, 600)
func getWidgetCentered(widget fyne.Widget)*fyne.Container{
widgetCentered := container.NewHBox(layout.NewSpacer(), widget, layout.NewSpacer())
return widgetCentered
func getLabelCentered(text string) *fyne.Container{
label := widget.NewLabel(text)
labelCentered := container.NewHBox(layout.NewSpacer(), label, layout.NewSpacer())
return labelCentered
func getBoldLabel(text string) fyne.Widget{
titleStyle := fyne.TextStyle{
Bold: true,
Italic: false,
Monospace: false,
boldLabel := widget.NewLabelWithStyle(text, fyne.TextAlign(fyne.TextAlignCenter), titleStyle)
return boldLabel
func getItalicLabel(text string) fyne.Widget{
italicTextStyle := fyne.TextStyle{
Bold: false,
Italic: true,
Monospace: false,
italicLabel := widget.NewLabelWithStyle(text, fyne.TextAlign(fyne.TextAlignCenter), italicTextStyle)
return italicLabel
func getBoldLabelCentered(inputText string)*fyne.Container{
boldLabel := getBoldLabel(inputText)
boldLabelCentered := container.NewHBox(layout.NewSpacer(), boldLabel, layout.NewSpacer())
return boldLabelCentered
func getItalicLabelCentered(inputText string)*fyne.Container{
italicLabel := getItalicLabel(inputText)
italicLabelCentered := container.NewHBox(layout.NewSpacer(), italicLabel, layout.NewSpacer())
return italicLabelCentered
func showUnderConstructionDialog(window fyne.Window){
dialogTitle := "Under Construction"
dialogMessageA := getLabelCentered("Seekia is under construction.")
dialogMessageB := getLabelCentered("This page/feature needs to be built.")
dialogContent := container.NewVBox(dialogMessageA, dialogMessageB)
dialog.ShowCustom(dialogTitle, "Close", dialogContent, window)
func getBackButtonCentered(previousPage func())*fyne.Container{
backButton := getWidgetCentered(widget.NewButtonWithIcon("Go Back", theme.NavigateBackIcon(), previousPage))
return backButton
func setErrorEncounteredPage(window fyne.Window, err error, previousPage func()){
title := getBoldLabelCentered("Error Encountered")
backButton := getBackButtonCentered(previousPage)
description1 := getLabelCentered("Something went wrong. Report this error to Seekia developers.")
header := container.NewVBox(title, backButton, widget.NewSeparator(), description1, widget.NewSeparator())
getErrorString := func()string{
if (err == nil){
return "No nav bar error encountered page called with nil error."
errorString := err.Error()
return errorString
errorString := getErrorString()
errorLabel := widget.NewLabel(errorString)
errorLabel.Wrapping = 3
errorLabel.Alignment = 1
errorLabel.TextStyle = fyne.TextStyle{
Bold: true,
Italic: false,
Monospace: false,
//TODO: Add copyable toggle
page := container.NewBorder(header, nil, nil, nil, errorLabel)
// This loading screen shows no progress, so it should only be used when retrieving progress is impossible
func setLoadingScreen(window fyne.Window, pageTitle string, loadingText string){
title := getBoldLabelCentered(pageTitle)
loadingLabel := getWidgetCentered(getItalicLabel(loadingText))
progressBar := getWidgetCentered(widget.NewProgressBarInfinite())
pageContent := container.NewVBox(title, loadingLabel, progressBar)
page := container.NewCenter(pageContent)
func setHomePage(window fyne.Window){
currentPage := func(){setHomePage(window)}
title := getBoldLabelCentered("Create Genetic Models Utility")
description1 := getLabelCentered("This utility is used to create the genetic prediction models.")
description2 := getLabelCentered("These models are used to predict traits such as eye color from raw genome files.")
description3 := getLabelCentered("Seekia aims to have open source and reproducible genetic prediction technology.")
step1Label := getLabelCentered("Step 1:")
downloadTrainingDataButton := getWidgetCentered(widget.NewButton("Download Training Data", func(){
setDownloadTrainingDataPage(window, currentPage)
step2Label := getLabelCentered("Step 2:")
extractTrainingDataButton := getWidgetCentered(widget.NewButton("Extract Training Data", func(){
setExtractTrainingDataPage(window, currentPage)
step3Label := getLabelCentered("Step 3:")
createTrainingDataButton := getWidgetCentered(widget.NewButton("Create Training Data", func(){
setCreateTrainingDataPage(window, currentPage)
step4Label := getLabelCentered("Step 4:")
trainModelsButton := getWidgetCentered(widget.NewButton("Train Models", func(){
setTrainModelsPage(window, currentPage)
step5Label := getLabelCentered("Step 5:")
testModelsButton := getWidgetCentered(widget.NewButton("Test Models", func(){
setTestModelsPage(window, currentPage)
//TODO: A page to verify the checksums of the generated .gob models
page := container.NewVBox(title, widget.NewSeparator(), description1, description2, description3, widget.NewSeparator(), step1Label, downloadTrainingDataButton, widget.NewSeparator(), step2Label, extractTrainingDataButton, widget.NewSeparator(), step3Label, createTrainingDataButton, widget.NewSeparator(), step4Label, trainModelsButton, widget.NewSeparator(), step5Label, testModelsButton)
func setDownloadTrainingDataPage(window fyne.Window, previousPage func()){
currentPage := func(){setDownloadTrainingDataPage(window, previousPage)}
title := getBoldLabelCentered("Download Training Data")
backButton := getBackButtonCentered(previousPage)
description1 := getLabelCentered("You must download the data dump file.")
description2 := getLabelCentered("This is a .tar.gz file which was created in August of 2023.")
description3 := getLabelCentered("It will be hosted on IPFS, a decentralized data sharing network.")
description4 := getLabelCentered("You must use an IPFS client to download the file.")
description5 := getLabelCentered("You can also download it via a torrent or web server if someone shares it elsewhere.")
currentClipboard := window.Clipboard()
ipfsIdentifierTitle := getLabelCentered("IPFS Content Identifier:")
ipfsIdentifierLabel := getBoldLabelCentered("Qme64v7Go941s3psokZ7aDngQR6Tdv55jDhUDdLZXsRiRh")
ipfsIdentifierCopyToClipboardButton := getWidgetCentered(widget.NewButtonWithIcon("Copy", theme.ContentCopyIcon(), func(){
fileNameTitle := getLabelCentered("File Name:")
fileNameLabel := getBoldLabelCentered("OpenSNPDataArchive.tar.gz")
fileHashTitle := getLabelCentered("File SHA256 Checksum Hash:")
fileHashLabel := getBoldLabelCentered("49f84fb71cb12df718a80c1ce25f6370ba758cbee8f24bd8a6d4f0da2e3c51ee")
fileSizeTitle := getLabelCentered("File Size:")
fileSizeLabel := getBoldLabelCentered("48,961,240 bytes (50.1 GB)")
fileExtractedSizeTitle := getLabelCentered("File Extracted Size:")
fileExtractedSizeLabel := getBoldLabelCentered("128,533,341,751 bytes (119.7 GB)")
verifyFileTitle := getBoldLabelCentered("Verify File")
verifyFileDescription1 := getLabelCentered("You can use the Seekia client to verify your downloaded file.")
verifyFileDescription2 := getLabelCentered("Press the button below and select your file.")
verifyFileDescription3 := getLabelCentered("This will take a while, because the file contents must be hashed.")
selectFileCallbackFunction := func(fyneFileObject fyne.URIReadCloser, err error){
if (err != nil){
setErrorEncounteredPage(window, err, currentPage)
if (fyneFileObject == nil){
setLoadingScreen(window, "Hashing File", "Calculating file hash...")
filePath := fyneFileObject.URI().String()
filePath = strings.TrimPrefix(filePath, "file://")
fileObject, err := os.Open(filePath)
if (err != nil){
setErrorEncounteredPage(window, err, currentPage)
defer fileObject.Close()
//TODO: Use Blake3 instead of sha256 for faster hashing
hasher := sha256.New()
_, err = io.Copy(hasher, fileObject)
if (err != nil){
setErrorEncounteredPage(window, err, currentPage)
hashResultBytes := hasher.Sum(nil)
expectedResult := "49f84fb71cb12df718a80c1ce25f6370ba758cbee8f24bd8a6d4f0da2e3c51ee"
expectedResultBytes, err := encoding.DecodeHexStringToBytes(expectedResult)
if (err != nil){
setErrorEncounteredPage(window, err, currentPage)
bytesAreEqual := bytes.Equal(hashResultBytes, expectedResultBytes)
if (bytesAreEqual == false){
title := "File Is Invalid"
dialogMessage1 := getLabelCentered("The file you downloaded is not valid.")
dialogMessage2 := getLabelCentered("The SHA256 Checksum does not match the expected checksum.")
dialogContent := container.NewVBox(dialogMessage1, dialogMessage2)
dialog.ShowCustom(title, "Close", dialogContent, window)
} else {
title := "File Is Valid"
dialogMessage1 := getLabelCentered("The file you downloaded is valid!")
dialogMessage2 := getLabelCentered("The SHA256 Checksum matches the expected checksum.")
dialogContent := container.NewVBox(dialogMessage1, dialogMessage2)
dialog.ShowCustom(title, "Close", dialogContent, window)
verifyFileButton := getWidgetCentered(widget.NewButton("Verify File", func(){
dialog.ShowFileOpen(selectFileCallbackFunction, window)
page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, description3, description4, description5, widget.NewSeparator(), ipfsIdentifierTitle, ipfsIdentifierLabel, ipfsIdentifierCopyToClipboardButton, widget.NewSeparator(), fileNameTitle, fileNameLabel, widget.NewSeparator(), fileHashTitle, fileHashLabel, widget.NewSeparator(), fileSizeTitle, fileSizeLabel, widget.NewSeparator(), fileExtractedSizeTitle, fileExtractedSizeLabel, widget.NewSeparator(), verifyFileTitle, verifyFileDescription1, verifyFileDescription2, verifyFileDescription3, verifyFileButton)
scrollablePage := container.NewVScroll(page)
func setExtractTrainingDataPage(window fyne.Window, previousPage func()){
currentPage := func(){setExtractTrainingDataPage(window, previousPage)}
title := getBoldLabelCentered("Extract Training Data")
backButton := getBackButtonCentered(previousPage)
description1 := getLabelCentered("You must extract the downloaded OpenSNPDataArchive.tar.gz to a folder.")
description2 := getLabelCentered("Once you have extracted the file, select the extracted folder using the page below.")
currentLocationTitle := getLabelCentered("Current Folder Location:")
getCurrentLocationLabel := func()(*fyne.Container, error){
fileExists, fileContents, err := localFilesystem.GetFileContents("./OpenSNPDataArchiveFolderpath.txt")
if (err != nil) { return nil, err }
if (fileExists == false){
noneLabel := getItalicLabelCentered("None")
return noneLabel, nil
folderpathLabel := getBoldLabelCentered(string(fileContents))
return folderpathLabel, nil
currentLocationLabel, err := getCurrentLocationLabel()
if (err != nil) {
setErrorEncounteredPage(window, err, previousPage)
selectFolderCallbackFunction := func(folderObject fyne.ListableURI, err error){
if (err != nil){
title := "Failed to open folder."
dialogMessage := getLabelCentered("Report this error to Seekia developers: " + err.Error())
dialogContent := container.NewVBox(dialogMessage)
dialog.ShowCustom(title, "Close", dialogContent, window)
if (folderObject == nil) {
folderPath := folderObject.Path()
fileContents := []byte(folderPath)
err = localFilesystem.CreateOrOverwriteFile(fileContents, "./", "OpenSNPDataArchiveFolderpath.txt")
if (err != nil){
title := "Failed to save file."
dialogMessage := getLabelCentered("Report this error to Seekia developers: " + err.Error())
dialogContent := container.NewVBox(dialogMessage)
dialog.ShowCustom(title, "Close", dialogContent, window)
selectFolderLocationButton := getWidgetCentered(widget.NewButtonWithIcon("Select Folder Location", theme.FolderIcon(), func(){
dialog.ShowFolderOpen(selectFolderCallbackFunction, window)
page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, widget.NewSeparator(), currentLocationTitle, currentLocationLabel, widget.NewSeparator(), selectFolderLocationButton)
scrollablePage := container.NewVScroll(page)
func setCreateTrainingDataPage(window fyne.Window, previousPage func()){
currentPage := func(){setCreateTrainingDataPage(window, previousPage)}
title := getBoldLabelCentered("Create Training Data")
backButton := getBackButtonCentered(previousPage)
description1 := getLabelCentered("Press the button below to begin creating the training data.")
description2 := getLabelCentered("This will prepare each user's genome into a file to use to train each neural network.")
description3 := getLabelCentered("This will take a while.")
beginCreatingButton := getWidgetCentered(widget.NewButtonWithIcon("Begin Creating Data", theme.MediaPlayIcon(), func(){
setStartAndMonitorCreateTrainingDataPage(window, currentPage)
page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, description3, beginCreatingButton)
func setStartAndMonitorCreateTrainingDataPage(window fyne.Window, previousPage func()){
err := locusMetadata.InitializeLocusMetadataVariables()
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
title := getBoldLabelCentered("Creating Training Data")
fileExists, fileContents, err := localFilesystem.GetFileContents("./OpenSNPDataArchiveFolderpath.txt")
if (err != nil) {
setErrorEncounteredPage(window, err, previousPage)
if (fileExists == false){
backButton := getBackButtonCentered(previousPage)
description1 := getBoldLabelCentered("You have not selected your OpenSNP data archive folderpath.")
description2 := getLabelCentered("Go back to step 2 and follow the instructions.")
page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2)
dataArchiveFolderpath := string(fileContents)
progressDetailsBinding := binding.NewString()
progressPercentageBinding := binding.NewFloat()
loadingBar := getWidgetCentered(widget.NewProgressBarWithData(progressPercentageBinding))
progressDetailsTitle := getBoldLabelCentered("Progress Details:")
progressDetailsLabel := widget.NewLabelWithData(progressDetailsBinding)
progressDetailsLabel.TextStyle = fyne.TextStyle{
Bold: false,
Italic: true,
Monospace: false,
progressDetailsLabelCentered := getWidgetCentered(progressDetailsLabel)
// We set this bool to true to stop the createData process
var createDataIsStoppedBoolMutex sync.RWMutex
createDataIsStoppedBool := false
cancelButton := getWidgetCentered(widget.NewButtonWithIcon("Cancel", theme.CancelIcon(), func(){
createDataIsStoppedBool = true
page := container.NewVBox(title, widget.NewSeparator(), loadingBar, progressDetailsTitle, progressDetailsLabelCentered, widget.NewSeparator(), cancelButton)
createTrainingDataFunction := func(){
// -bool: Process completed (true == was not stopped mid-way)
// -bool: Data archive is well formed
// -error
createTrainingData := func()(bool, bool, error){
phenotypesFilepath := goFilepath.Join(dataArchiveFolderpath, "OpenSNPData", "phenotypes_202308230100.csv")
fileObject, err := os.Open(phenotypesFilepath)
if (err != nil){
fileDoesNotExist := os.IsNotExist(err)
if (fileDoesNotExist == true){
// Archive is corrupt
return true, false, nil
return false, false, err
defer fileObject.Close()
fileIsWellFormed, userPhenotypesList_OpenSNP := readBiobankData.ReadOpenSNPPhenotypesFile(fileObject)
if (fileIsWellFormed == false){
// Archive is corrupt
return true, false, nil
// This is the folderpath for the folder which contains all of the user raw genomes
openSNPRawGenomesFolderpath := goFilepath.Join(dataArchiveFolderpath, "OpenSNPData")
filesList, err := os.ReadDir(openSNPRawGenomesFolderpath)
if (err != nil) { return false, false, err }
// Map Structure: User ID -> List of user raw genome filepaths
userRawGenomeFilepathsMap := make(map[int][]string)
for _, filesystemObject := range filesList{
filepathIsFolder := filesystemObject.IsDir()
if (filepathIsFolder == true){
// Archive is corrupt
return true, false, nil
fileName := filesystemObject.Name()
// Example of a raw genome filename: "user1_file9_yearofbirth_1985_sex_XY.23andme"
userIDWithRawGenomeInfo, fileIsUserGenome := strings.CutPrefix(fileName, "user")
if (fileIsUserGenome == false){
// File is not a user genome, skip it.
userIDString, rawGenomeInfo, separatorFound := strings.Cut(userIDWithRawGenomeInfo, "_")
if (separatorFound == false){
// Archive is corrupt
return true, false, nil
userID, err := helpers.ConvertStringToInt(userIDString)
if (err != nil){
// Archive is corrupt
return true, false, nil
getFileIsReadableStatus := func()bool{
is23andMe := strings.HasSuffix(rawGenomeInfo, ".23andme.txt")
if (is23andMe == true){
// We can read this file
return true
isAncestry := strings.HasSuffix(rawGenomeInfo, ".ancestry.txt")
if (isAncestry == true){
// We can read this file
return true
// We cannot read this raw genome file
//TODO: Add ability to read more raw genome files
return false
fileIsReadable := getFileIsReadableStatus()
if (fileIsReadable == true){
rawGenomeFilepath := goFilepath.Join(openSNPRawGenomesFolderpath, fileName)
existingList, exists := userRawGenomeFilepathsMap[userID]
if (exists == false){
userRawGenomeFilepathsMap[userID] = []string{rawGenomeFilepath}
} else {
existingList = append(existingList, rawGenomeFilepath)
userRawGenomeFilepathsMap[userID] = existingList
// We create folder to store training data
_, err = localFilesystem.CreateFolder("./TrainingData")
if (err != nil) { return false, false, err }
_, err = localFilesystem.CreateFolder("./TrainingData/EyeColor")
if (err != nil) { return false, false, err }
numberOfUserPhenotypeDataObjects := len(userPhenotypesList_OpenSNP)
maximumIndex := numberOfUserPhenotypeDataObjects-1
numberOfUsersString := helpers.ConvertIntToString(numberOfUserPhenotypeDataObjects)
for index, userPhenotypeDataObject := range userPhenotypesList_OpenSNP{
trainingProgressPercentage, err := helpers.ScaleNumberProportionally(true, index, 0, maximumIndex, 0, 100)
if (err != nil) { return false, false, err }
trainingProgressFloat64 := float64(trainingProgressPercentage)/100
err = progressPercentageBinding.Set(trainingProgressFloat64)
if (err != nil) { return false, false, err }
createDataIsStopped := createDataIsStoppedBool
if (createDataIsStopped == true){
// User exited the process
return false, false, nil
userIndexString := helpers.ConvertIntToString(index + 1)
progressDetailsStatus := "Processing User " + userIndexString + "/" + numberOfUsersString
err = progressDetailsBinding.Set(progressDetailsStatus)
if (err != nil) { return false, false, err }
userID := userPhenotypeDataObject.UserID
userRawGenomeFilepathsList, exists := userRawGenomeFilepathsMap[userID]
if (exists == false){
// User has no genomes
// We read all of the user's raw genomes and combine them into a single genomeMap which excludes conflicting loci values
userRawGenomesWithMetadataList := make([]prepareRawGenomes.RawGenomeWithMetadata, 0)
for _, userRawGenomeFilepath := range userRawGenomeFilepathsList{
// -bool: Able to read raw genome file
// -bool: Genome is phased
// -map[int64]readRawGenomes.RawGenomeLocusValue
// -error
readRawGenomeMap := func()(bool, bool, map[int64]readRawGenomes.RawGenomeLocusValue, error){
fileObject, err := os.Open(userRawGenomeFilepath)
if (err != nil) { return false, false, nil, err }
defer fileObject.Close()
_, _, _, _, genomeIsPhased, rawGenomeMap, err := readRawGenomes.ReadRawGenomeFile(fileObject)
if (err != nil) {
//log.Println("Raw genome file is malformed: " + userRawGenomeFilepath + ". Reason: " + err.Error())
return false, false, nil, nil
return true, genomeIsPhased, rawGenomeMap, nil
ableToReadRawGenome, rawGenomeIsPhased, rawGenomeMap, err := readRawGenomeMap()
if (err != nil){ return false, false, err }
if (ableToReadRawGenome == false){
// We cannot read this genome file
// Many of the genome files are unreadable.
//TODO: Improve ability to read slightly corrupted genome files
newGenomeIdentifier, err := helpers.GetNewRandom16ByteArray()
if (err != nil) { return false, false, err }
rawGenomeWithMetadata := prepareRawGenomes.RawGenomeWithMetadata{
GenomeIdentifier: newGenomeIdentifier,
GenomeIsPhased: rawGenomeIsPhased,
RawGenomeMap: rawGenomeMap,
userRawGenomesWithMetadataList = append(userRawGenomesWithMetadataList, rawGenomeWithMetadata)
if (len(userRawGenomesWithMetadataList) == 0){
// None of the user's genome files are readable
getUserLociValuesMap := func()(map[int64]locusValue.LocusValue, error){
updatePercentageCompleteFunction := func(_ int)error{
return nil
genomesWithMetadataList, _, combinedGenomesExist, onlyExcludeConflictsGenomeIdentifier, _, err := prepareRawGenomes.GetGenomesWithMetadataListFromRawGenomesList(userRawGenomesWithMetadataList, updatePercentageCompleteFunction)
if (err != nil) { return nil, err }
if (combinedGenomesExist == false){
if (len(genomesWithMetadataList) != 1){
return nil, errors.New("GetGenomesWithMetadataListFromRawGenomesList returning non-1 length genomesWithMetadataList when combinedGenomesExist == false")
// Only 1 genome exists
genomeWithMetadataObject := genomesWithMetadataList[0]
genomeMap := genomeWithMetadataObject.GenomeMap
return genomeMap, nil
for _, genomeWithMetadataObject := range genomesWithMetadataList{
genomeIdentifier := genomeWithMetadataObject.GenomeIdentifier
if (genomeIdentifier == onlyExcludeConflictsGenomeIdentifier){
genomeMap := genomeWithMetadataObject.GenomeMap
return genomeMap, nil
return nil, errors.New("OnlyExcludeConflicts genome not found from GetGenomesWithMetadataListFromRawGenomesList's returned list.")
userLociValuesMap, err := getUserLociValuesMap()
if (err != nil) { return false, false, err }
//TODO: Add more traits
traitNamesList := []string{"Eye Color"}
for _, traitName := range traitNamesList{
traitNameWithoutWhitespace := strings.ReplaceAll(traitName, " ", "")
trainingDataFolderpath := goFilepath.Join("./TrainingData", traitNameWithoutWhitespace)
userDataExists, userTrainingDataList, err := geneticPrediction.CreateGeneticPredictionTrainingData_OpenSNP(traitName, userPhenotypeDataObject, userLociValuesMap)
if (err != nil) { return false, false, err }
if (userDataExists == false){
// User cannot be used for training
for index, trainingData := range userTrainingDataList{
userTrainingDataBytes, err := geneticPrediction.EncodeTrainingDataObjectToBytes(trainingData)
if (err != nil) { return false, false, err }
trainingDataIndexString := helpers.ConvertIntToString(index+1)
userIDString := helpers.ConvertIntToString(userID)
trainingDataFilename := "User" + userIDString + "_TrainingData_" + trainingDataIndexString + ".gob"
err = localFilesystem.CreateOrOverwriteFile(userTrainingDataBytes, trainingDataFolderpath, trainingDataFilename)
if (err != nil) { return false, false, err }
return true, false, nil
processIsComplete, archiveIsCorrupt, err := createTrainingData()
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
if (processIsComplete == false){
// User exited the page
if (archiveIsCorrupt == true){
title := getBoldLabelCentered("OpenSNP Archive Is Corrupt")
description1 := getBoldLabelCentered("Your downloaded OpenSNP data archive is corrupt.")
description2 := getLabelCentered("The extracted folder contents do not match what the archive should contain.")
description3 := getLabelCentered("You should re-extract the contents of the archive.")
exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), previousPage))
page := container.NewVBox(title, widget.NewSeparator(), description1, description2, description3, exitButton)
go createTrainingDataFunction()
func setCreateTrainingDataIsCompletePage(window fyne.Window){
title := getBoldLabelCentered("Creating Data Is Complete")
description1 := getLabelCentered("Creating training data is complete!")
description2 := getLabelCentered("The data have been saved in the TrainingData folder.")
exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), func(){
page := container.NewVBox(title, widget.NewSeparator(), description1, description2, exitButton)
func setTrainModelsPage(window fyne.Window, previousPage func()){
currentPage := func(){setTrainModelsPage(window, previousPage)}
title := getBoldLabelCentered("Train Models")
backButton := getBackButtonCentered(previousPage)
description1 := getLabelCentered("Press the button below to begin training the genetic models.")
description2 := getLabelCentered("This will train each neural network using the user training data.")
description3 := getLabelCentered("This will take a while.")
beginTrainingButton := getWidgetCentered(widget.NewButtonWithIcon("Begin Training Models", theme.MediaPlayIcon(), func(){
setStartAndMonitorTrainModelsPage(window, currentPage)
page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, description3, beginTrainingButton)
func setStartAndMonitorTrainModelsPage(window fyne.Window, previousPage func()){
title := getBoldLabelCentered("Train Models")
//TODO: Verify TrainingData folder integrity
progressDetailsBinding := binding.NewString()
progressPercentageBinding := binding.NewFloat()
loadingBar := getWidgetCentered(widget.NewProgressBarWithData(progressPercentageBinding))
progressDetailsTitle := getBoldLabelCentered("Progress Details:")
progressDetailsLabel := widget.NewLabelWithData(progressDetailsBinding)
progressDetailsLabel.TextStyle = fyne.TextStyle{
Bold: false,
Italic: true,
Monospace: false,
progressDetailsLabelCentered := getWidgetCentered(progressDetailsLabel)
// We set this bool to true to stop the trainModels process
var trainModelsIsStoppedBoolMutex sync.RWMutex
trainModelsIsStoppedBool := false
cancelButton := getWidgetCentered(widget.NewButtonWithIcon("Cancel", theme.CancelIcon(), func(){
trainModelsIsStoppedBool = true
page := container.NewVBox(title, widget.NewSeparator(), loadingBar, progressDetailsTitle, progressDetailsLabelCentered, widget.NewSeparator(), cancelButton)
trainModelsFunction := func(){
// -bool: Process completed (true == was not stopped mid-way)
// -error
trainModels := func()(bool, error){
_, err := localFilesystem.CreateFolder("./TrainedModels")
if (err != nil) { return false, err }
traitNamesList := []string{"Eye Color"}
for _, traitName := range traitNamesList{
trainingSetFilepathsList, _, err := getTrainingAndTestingDataFilepathLists(traitName)
if (err != nil) { return false, err }
// We create a new neural network object to train
neuralNetworkObject, err := geneticPrediction.GetNewUntrainedNeuralNetworkObject(traitName)
if (err != nil) { return false, err }
numberOfTrainingDatas := len(trainingSetFilepathsList)
numberOfTrainingDatasString := helpers.ConvertIntToString(numberOfTrainingDatas)
finalIndex := numberOfTrainingDatas - 1
for index, filePath := range trainingSetFilepathsList{
trainModelsIsStopped := trainModelsIsStoppedBool
if (trainModelsIsStopped == true){
// User exited the process
return false, nil
fileExists, fileContents, err := localFilesystem.GetFileContents(filePath)
if (err != nil) { return false, err }
if (fileExists == false){
return false, errors.New("TrainingData file not found: " + filePath)
trainingDataObject, err := geneticPrediction.DecodeBytesToTrainingDataObject(fileContents)
if (err != nil) { return false, err }
err = geneticPrediction.TrainNeuralNetwork(traitName, neuralNetworkObject, trainingDataObject)
if (err != nil) { return false, err }
exampleIndexString := helpers.ConvertIntToString(index+1)
numberOfExamplesProgress := "Trained " + exampleIndexString + "/" + numberOfTrainingDatasString + " Examples"
percentageProgressInt, err := helpers.ScaleNumberProportionally(true, index, 0, finalIndex, 0, 100)
if (err != nil) { return false, err }
newProgressFloat64 := float64(percentageProgressInt)/100
// Network training is complete.
// We now save the neural network as a .gob file
neuralNetworkBytes, err := geneticPrediction.EncodeNeuralNetworkObjectToBytes(*neuralNetworkObject)
if (err != nil) { return false, err }
traitNameWithoutWhitespaces := strings.ReplaceAll(traitName, " ", "")
neuralNetworkFilename := traitNameWithoutWhitespaces + "Model.gob"
err = localFilesystem.CreateOrOverwriteFile(neuralNetworkBytes, "./TrainedModels/", neuralNetworkFilename)
if (err != nil) { return false, err }
return true, nil
processIsComplete, err := trainModels()
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
if (processIsComplete == false){
// User exited the page
go trainModelsFunction()
func setTrainModelsIsCompletePage(window fyne.Window){
title := getBoldLabelCentered("Training Models Is Complete")
description1 := getLabelCentered("Model training is complete!")
description2 := getLabelCentered("The models have been saved in the TrainedModels folder.")
exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), func(){
page := container.NewVBox(title, widget.NewSeparator(), description1, description2, exitButton)
func setTestModelsPage(window fyne.Window, previousPage func()){
currentPage := func(){setTestModelsPage(window, previousPage)}
title := getBoldLabelCentered("Test Models")
backButton := getBackButtonCentered(previousPage)
description1 := getLabelCentered("Press the button below to begin testing the genetic models.")
description2 := getLabelCentered("This will test each neural network using user training data examples.")
description3 := getLabelCentered("The testing data is not used to train the models.")
description4 := getLabelCentered("The results of the testing will be displayed at the end.")
beginTestingButton := getWidgetCentered(widget.NewButtonWithIcon("Begin Testing Models", theme.MediaPlayIcon(), func(){
setStartAndMonitorTestModelsPage(window, currentPage)
page := container.NewVBox(title, backButton, widget.NewSeparator(), description1, description2, description3, description4, beginTestingButton)
func setStartAndMonitorTestModelsPage(window fyne.Window, previousPage func()){
title := getBoldLabelCentered("Test Models")
progressDetailsBinding := binding.NewString()
progressPercentageBinding := binding.NewFloat()
loadingBar := getWidgetCentered(widget.NewProgressBarWithData(progressPercentageBinding))
progressDetailsTitle := getBoldLabelCentered("Progress Details:")
progressDetailsLabel := widget.NewLabelWithData(progressDetailsBinding)
progressDetailsLabel.TextStyle = fyne.TextStyle{
Bold: false,
Italic: true,
Monospace: false,
progressDetailsLabelCentered := getWidgetCentered(progressDetailsLabel)
// We set this bool to true to stop the testModels process
var testModelsIsStoppedBoolMutex sync.RWMutex
testModelsIsStoppedBool := false
cancelButton := getWidgetCentered(widget.NewButtonWithIcon("Cancel", theme.CancelIcon(), func(){
testModelsIsStoppedBool = true
page := container.NewVBox(title, widget.NewSeparator(), loadingBar, progressDetailsTitle, progressDetailsLabelCentered, widget.NewSeparator(), cancelButton)
testModelsFunction := func(){
// This map stores the accuracy for each model
// Map Structure: Trait Name -> Accuracy (A value between 0 and 1, 1 is fully accurate, 0 is fully inaccurate)
traitAverageAccuracyMap := make(map[string]float32)
// -bool: Process completed (true == was not stopped mid-way)
// -error
testModels := func()(bool, error){
traitNamesList := []string{"Eye Color"}
for _, traitName := range traitNamesList{
_, testingSetFilepathsList, err := getTrainingAndTestingDataFilepathLists(traitName)
if (err != nil) { return false, err }
traitNameWithoutWhitespaces := strings.ReplaceAll(traitName, " ", "")
// We read the trained model for this trait
modelFilename := traitNameWithoutWhitespaces + "Model.gob"
trainedModelFilepath := goFilepath.Join("./TrainedModels/", modelFilename)
fileExists, fileContents, err := localFilesystem.GetFileContents(trainedModelFilepath)
if (err != nil) { return false, err }
if (fileExists == false){
return false, errors.New("TrainedModel not found: " + trainedModelFilepath)
neuralNetworkObject, err := geneticPrediction.DecodeBytesToNeuralNetworkObject(fileContents)
if (err != nil) { return false, err }
numberOfTrainingDatas := len(testingSetFilepathsList)
numberOfTrainingDatasString := helpers.ConvertIntToString(numberOfTrainingDatas)
finalIndex := numberOfTrainingDatas - 1
// This is the sum of accuracy for each training data
accuracySum := float32(0)
for index, filePath := range testingSetFilepathsList{
testModelsIsStopped := testModelsIsStoppedBool
if (testModelsIsStopped == true){
// User exited the process
return false, nil
fileExists, fileContents, err := localFilesystem.GetFileContents(filePath)
if (err != nil) { return false, err }
if (fileExists == false){
return false, errors.New("TrainingData file not found: " + filePath)
trainingDataObject, err := geneticPrediction.DecodeBytesToTrainingDataObject(fileContents)
if (err != nil) { return false, err }
trainingDataInputLayer := trainingDataObject.InputLayer
trainingDataExpectedOutputLayer := trainingDataObject.OutputLayer
predictionLayer, err := geneticPrediction.GetNeuralNetworkRawPrediction(&neuralNetworkObject, trainingDataInputLayer)
if (err != nil) { return false, err }
numberOfPredictionNeurons := len(predictionLayer)
if (len(trainingDataExpectedOutputLayer) != numberOfPredictionNeurons){
return false, errors.New("Neural network prediction output length does not match expected output length.")
// TODO: Improve how we calculate the accuracy
// We should take into account the number of loci that were provided by the user's genome,
// and display an accuracy for each number of loci provided.
// For example, if 90% of loci values were provided, accuracy is 80%. If only 10% were provided, accuracy is 20%.
// This is the sum of the distance between the expected values and the predicted values
totalDistance := float32(0)
for index, element := range predictionLayer{
// Each element is a neuron value between 0 and 1
// We see how far away the answer is from the expected value
expectedValue := trainingDataExpectedOutputLayer[index]
distance := element - expectedValue
// We make distance positive
if (distance < 0){
distance = -distance
totalDistance += distance
averageDistance := totalDistance/float32(numberOfPredictionNeurons)
accuracy := 1 - averageDistance
accuracySum += accuracy
exampleIndexString := helpers.ConvertIntToString(index+1)
numberOfExamplesProgress := "Tested " + exampleIndexString + "/" + numberOfTrainingDatasString + " Examples"
percentageProgressInt, err := helpers.ScaleNumberProportionally(true, index, 0, finalIndex, 0, 100)
if (err != nil) { return false, err }
newProgressFloat64 := float64(percentageProgressInt)/100
averageAccuracy := accuracySum/float32(numberOfTrainingDatas)
traitAverageAccuracyMap[traitName] = averageAccuracy
// Testing is complete.
return true, nil
processIsComplete, err := testModels()
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
if (processIsComplete == false){
// User exited the page
setTestModelsIsCompletePage(window, traitAverageAccuracyMap)
go testModelsFunction()
// This function returns a list of training data and testing data filepaths for a trait.
// -[]string: Sorted list of training data filepaths
// -[]string: Unsorted list of testing data filepaths
// -error
func getTrainingAndTestingDataFilepathLists(traitName string)([]string, []string, error){
if (traitName != "Eye Color"){
return nil, nil, errors.New("getTrainingAndTestingDataFilepathLists called with invalid traitName: " + traitName)
traitNameWithoutWhitespaces := strings.ReplaceAll(traitName, " ", "")
trainingDataFolderpath := goFilepath.Join("./TrainingData/", traitNameWithoutWhitespaces)
filesList, err := os.ReadDir(trainingDataFolderpath)
if (err != nil) { return nil, nil, err }
// This stores the filepath for each training data
trainingDataFilepathsList := make([]string, 0, len(filesList))
for _, filesystemObject := range filesList{
filepathIsFolder := filesystemObject.IsDir()
if (filepathIsFolder == true){
// Folder is corrupt
return nil, nil, errors.New("Training data is corrupt for trait: " + traitName)
fileName := filesystemObject.Name()
filepath := goFilepath.Join(trainingDataFolderpath, fileName)
trainingDataFilepathsList = append(trainingDataFilepathsList, filepath)
numberOfTrainingDataFiles := len(trainingDataFilepathsList)
if (numberOfTrainingDataFiles == 0){
return nil, nil, errors.New("No training data exists for trait: " + traitName)
if ((numberOfTrainingDataFiles % 110) != 0){
// There are 110 examples for each user.
return nil, nil, errors.New(traitName + " training data has an invalid number of examples.")
getNumberOfExpectedTrainingDatas := func()(int, error){
if (traitName == "Eye Color"){
return 113190, nil
return 0, errors.New("Unknown traitName: " + traitName)
numberOfExpectedTrainingDatas, err := getNumberOfExpectedTrainingDatas()
if (err != nil){ return nil, nil, err }
if (numberOfTrainingDataFiles != numberOfExpectedTrainingDatas){
numberOfTrainingDataFilesString := helpers.ConvertIntToString(numberOfTrainingDataFiles)
return nil, nil, errors.New(traitName + " number of training datas is unexpected: " + numberOfTrainingDataFilesString)
// We sort the training data to be in a deterministically random order
// This allows us to train the neural network in the same order each time
// We do this so we can generate deterministic models which are identical byte-for-byte
// We have to set aside 200 user's training datas for testing the neural network
// We have to remove them per-user because each user has 110 training datas.
// Otherwise, we would be training and testing on data from the same users.
// We need to test with users that the models were never trained upon.
// First we extract the user identifiers from the data
userIdentifiersMap := make(map[int]struct{})
for _, trainingDataFilepath := range trainingDataFilepathsList{
// We have to extract the filename from the filepath
trainingDataFilename := goFilepath.Base(trainingDataFilepath)
// Example filepath format: "User4680_TrainingData_89.gob"
trimmedFilename := strings.TrimPrefix(trainingDataFilename, "User")
userIdentifierString, _, underscoreExists := strings.Cut(trimmedFilename, "_")
if (underscoreExists == false){
return nil, nil, errors.New("Invalid trainingData filename: " + trainingDataFilename)
userIdentifier, err := helpers.ConvertStringToInt(userIdentifierString)
if (err != nil){
return nil, nil, errors.New("Invalid trainingData filename: " + trainingDataFilename)
userIdentifiersMap[userIdentifier] = struct{}{}
userIdentifiersList := helpers.GetListOfMapKeys(userIdentifiersMap)
// We sort the user identifiers list in ascending order
// Now we deterministically randomize the order of the user identifiers list
pseudorandomNumberGenerator := mathRand.New(mathRand.NewPCG(1, 2))
pseudorandomNumberGenerator.Shuffle(len(userIdentifiersList), func(i int, j int){
userIdentifiersList[i], userIdentifiersList[j] = userIdentifiersList[j], userIdentifiersList[i]
trainingSetFilepathsList := make([]string, 0)
testingSetFilepathsList := make([]string, 0)
numberOfUsers := len(userIdentifiersList)
if (numberOfUsers < 250){
return nil, nil, errors.New("Too few training data examples for trait: " + traitName)
// We use 200 users for testing (validation), so we don't train using them
numberOfTrainingUsers := numberOfUsers - 200
for index, userIdentifier := range userIdentifiersList{
// Example filepath format: "User4680_TrainingData_89.gob"
userIdentifierString := helpers.ConvertIntToString(userIdentifier)
trainingDataFilenamePrefix := "User" + userIdentifierString + "_TrainingData_"
for k:=1; k <= 110; k++{
kString := helpers.ConvertIntToString(k)
trainingDataFilename := trainingDataFilenamePrefix + kString + ".gob"
trainingDataFilepath := goFilepath.Join(trainingDataFolderpath, trainingDataFilename)
if (index < numberOfTrainingUsers){
trainingSetFilepathsList = append(trainingSetFilepathsList, trainingDataFilepath)
} else {
testingSetFilepathsList = append(testingSetFilepathsList, trainingDataFilepath)
return trainingSetFilepathsList, testingSetFilepathsList, nil
func setTestModelsIsCompletePage(window fyne.Window, traitPredictionAccuracyMap map[string]float32){
title := getBoldLabelCentered("Testing Models Is Complete")
description1 := getLabelCentered("Model testing is complete!")
description2 := getLabelCentered("The results of the testing are below.")
getResultsGrid := func()(*fyne.Container, error){
traitNameTitle := getItalicLabelCentered("Trait Name")
predictionAccuracyTitle := getItalicLabelCentered("Prediction Accuracy")
traitNameColumn := container.NewVBox(traitNameTitle, widget.NewSeparator())
predictionAccuracyColumn := container.NewVBox(predictionAccuracyTitle, widget.NewSeparator())
traitNamesList := helpers.GetListOfMapKeys(traitPredictionAccuracyMap)
for _, traitName := range traitNamesList{
traitNameLabel := getBoldLabelCentered(traitName)
traitPredictionAccuracy, exists := traitPredictionAccuracyMap[traitName]
if (exists == false){
return nil, errors.New("traitPredictionAccuracyMap missing traitName: " + traitName)
traitPredictionAccuracyString := helpers.ConvertFloat64ToStringRounded(float64(traitPredictionAccuracy)*100, 2)
traitPredictionAccuracyFormatted := traitPredictionAccuracyString + "%"
traitPredictionAccuracyLabel := getBoldLabelCentered(traitPredictionAccuracyFormatted)
resultsGrid := container.NewHBox(layout.NewSpacer(), traitNameColumn, predictionAccuracyColumn, layout.NewSpacer())
return resultsGrid, nil
resultsGrid, err := getResultsGrid()
if (err != nil){
setErrorEncounteredPage(window, err, func(){setHomePage(window)})
exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), func(){
page := container.NewVBox(title, widget.NewSeparator(), description1, description2, exitButton, widget.NewSeparator(), resultsGrid)
// We use this to define a custom fyne theme
// We are only overriding the foreground color to pure black
type customTheme struct{
defaultTheme fyne.Theme
func getCustomFyneTheme()fyne.Theme{
standardThemeObject := theme.LightTheme()
newTheme := customTheme{
defaultTheme: standardThemeObject,
return newTheme
// This function is used to define our custom fyne themes
// It changes a few default colors, while leaving all other colors the same as the default theme
func (input customTheme)Color(colorName fyne.ThemeColorName, variant fyne.ThemeVariant)color.Color{
switch colorName{
case theme.ColorNameForeground:{
newColor := color.Black
return newColor
case theme.ColorNameSeparator:{
// This is the color used for separators
newColor := color.Black
return newColor
case theme.ColorNameInputBackground:{
// This color is used for the background of input elements such as text entries
newColor, err := imagery.GetColorObjectFromColorCode("b3b3b3")
if (err == nil){
return newColor
case theme.ColorNameButton:{
// This is the color used for buttons
newColor, err := imagery.GetColorObjectFromColorCode("d8d8d8")
if (err == nil){
return newColor
case theme.ColorNamePlaceHolder:{
// This is the color used for text
newColor, err := imagery.GetColorObjectFromColorCode("4d4d4d")
if (err == nil){
return newColor
// We will use the default color for this theme
return input.defaultTheme.Color(colorName, variant)
// Our custom themes change nothing about the default theme fonts
func (input customTheme)Font(style fyne.TextStyle)fyne.Resource{
themeFont := input.defaultTheme.Font(style)
return themeFont
// Our custom themes change nothing about the default theme icons
func (input customTheme)Icon(iconName fyne.ThemeIconName)fyne.Resource{
themeIcon := input.defaultTheme.Icon(iconName)
return themeIcon
func (input customTheme)Size(name fyne.ThemeSizeName)float32{
themeSize := input.defaultTheme.Size(name)
if (name == theme.SizeNameText){
// After fyne v2.3.0, text labels are no longer the same height as buttons
// We increase the text size so that a text label is the same height as a button
// We need to increase text size because we are creating grids by creating multiple VBoxes, and connecting them with an HBox
// If we could create grids in a different way, we could avoid having to do this
// Example: Create a new grid type: container.NewThinGrid?
// -The columns will only be as wide as the the widest element within them
// -We can add separators between each row (grid.ShowRowLines = true) or between columns (grid.ShowColumnLines = true)
// -We can add borders (grid.ShowTopBorder = true, grid.ShowBottomBorder = true, grid.ShowLeftBorder = true, grid.ShowRightBorder = true)
// Using a different grid type is the solution we need to eventually use
// Then, we can show the user an option to increase the text size globally, and all grids will still render correctly
result := themeSize * 1.08
return result
return themeSize