Added the Obesity disease to genetic analyses.
This commit is contained in:
parent
8bc2bc01f3
commit
02676dbda1
29 changed files with 408 additions and 103 deletions
|
@ -6,6 +6,7 @@ Small and insignificant changes may not be included in this log.
|
|||
|
||||
## Unversioned Changes
|
||||
|
||||
* Added the Obesity disease to genetic analyses. - *Simon Sarasova*
|
||||
* Implemented neural network prediction for polygenic diseases to replace old method. Added autism and homosexualness to genetic analyses. - *Simon Sarasova*
|
||||
* Increased the quantity of attributes that are extracted from the OpenSNP biobank data archive. - *Simon Sarasova*
|
||||
* Added numeric traits to genetic analyses. - *Simon Sarasova*
|
||||
|
|
|
@ -9,4 +9,4 @@ Many other people have written code for modules which are imported by Seekia. Th
|
|||
|
||||
Name | Date Of First Commit | Number Of Commits
|
||||
--- | --- | ---
|
||||
Simon Sarasova | June 13, 2023 | 278
|
||||
Simon Sarasova | June 13, 2023 | 279
|
|
@ -395,7 +395,8 @@ func initializeApplicationVariables()error{
|
|||
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) { return err }
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) { return err }
|
||||
|
|
|
@ -37,9 +37,13 @@ func TestGenerateParameters(t *testing.T){
|
|||
func TestGenerateProfiles(t *testing.T){
|
||||
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
|
||||
err := traits.InitializeTraitVariables()
|
||||
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
|
||||
}
|
||||
|
|
|
@ -25,7 +25,11 @@ func TestCreateCoupleGeneticAnalysis_SingleGenomes(t *testing.T){
|
|||
}
|
||||
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
|
||||
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
|
@ -104,7 +108,11 @@ func TestCreateCoupleGeneticAnalysis_SingleAndMultipleGenomes(t *testing.T){
|
|||
}
|
||||
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
|
||||
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
|
@ -206,7 +214,11 @@ func TestCreateCoupleGeneticAnalysis_MultipleGenomes(t *testing.T){
|
|||
}
|
||||
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
|
||||
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
|
|
|
@ -25,7 +25,11 @@ func TestCreatePersonGeneticAnalysis_SingleGenome(t *testing.T){
|
|||
}
|
||||
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
|
||||
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
|
@ -88,7 +92,11 @@ func TestCreatePersonGeneticAnalysis_MultipleGenomes(t *testing.T){
|
|||
}
|
||||
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
|
||||
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
|
|
|
@ -804,7 +804,8 @@ func GetNumericOutcomeValueFromOutputLayer(attributeName string, outputLayer []f
|
|||
return 54, 272, nil
|
||||
}
|
||||
case "Autism",
|
||||
"Homosexualness":{
|
||||
"Homosexualness",
|
||||
"Obesity":{
|
||||
return 0, 10, nil
|
||||
}
|
||||
}
|
||||
|
@ -860,6 +861,11 @@ func getNeuralNetworkLayerSizes(attributeName string)(int, int, int, int, error)
|
|||
// There is 1 output neuron, representing a homosexualness value
|
||||
return 12, 10, 5, 1, nil
|
||||
}
|
||||
case "Obesity":{
|
||||
// There are 3000 input neurons
|
||||
// There is 1 output neuron, representing an obesity value
|
||||
return 3000, 3, 2, 1, nil
|
||||
}
|
||||
}
|
||||
|
||||
return 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown attributeName: " + attributeName)
|
||||
|
@ -931,7 +937,8 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
|
|||
|
||||
return traitLociList, nil
|
||||
}
|
||||
case "Autism":{
|
||||
case "Autism",
|
||||
"Obesity":{
|
||||
|
||||
diseaseObject, err := polygenicDiseases.GetPolygenicDiseaseObject(attributeName)
|
||||
if (err != nil) { return nil, err }
|
||||
|
@ -1106,6 +1113,21 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
|
|||
|
||||
outputLayer := []float32{outputValueFloat32}
|
||||
|
||||
return true, outputLayer, nil
|
||||
}
|
||||
case "Obesity":{
|
||||
|
||||
userObesityIsKnown := userPhenotypeDataObject.ObesityIsKnown
|
||||
if (userObesityIsKnown == false){
|
||||
return false, nil, nil
|
||||
}
|
||||
|
||||
userObesity := userPhenotypeDataObject.Obesity
|
||||
|
||||
outputValueFloat32 := float32(userObesity)
|
||||
|
||||
outputLayer := []float32{outputValueFloat32}
|
||||
|
||||
return true, outputLayer, nil
|
||||
}
|
||||
}
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -15,9 +15,13 @@ import "testing"
|
|||
func TestPersonSampleAnalyses(t *testing.T){
|
||||
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
|
||||
err := traits.InitializeTraitVariables()
|
||||
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
|
||||
}
|
||||
|
@ -47,9 +51,13 @@ func TestPersonSampleAnalyses(t *testing.T){
|
|||
func TestCoupleSampleAnalyses(t *testing.T){
|
||||
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
|
||||
err := traits.InitializeTraitVariables()
|
||||
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
|
||||
}
|
||||
|
|
|
@ -1382,14 +1382,18 @@ func TestCreateAndReadRequest_BroadcastContent(t *testing.T){
|
|||
|
||||
// We initialize these variables so we can create fake profiles
|
||||
|
||||
err := traits.InitializeTraitVariables()
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
|
||||
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
|
||||
err = profileFormat.InitializeProfileFormatVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("Failed to initialize profile format variables: " + err.Error())
|
||||
|
|
|
@ -326,14 +326,18 @@ func TestCreateAndReadResponse_GetProfilesInfo(t *testing.T){
|
|||
|
||||
func TestCreateAndReadResponse_GetProfiles(t *testing.T){
|
||||
|
||||
err := traits.InitializeTraitVariables()
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
|
||||
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
|
||||
err = profileFormat.InitializeProfileFormatVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("Failed to initialize profile format variables: " + err.Error())
|
||||
|
|
|
@ -19,7 +19,10 @@ func TestGetAttributeDisplayInfo(t *testing.T){
|
|||
t.Fatalf("InitializeGlobalSettingsDatastore failed: " + err.Error())
|
||||
}
|
||||
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
|
|
|
@ -10,11 +10,15 @@ import "seekia/internal/helpers"
|
|||
import "testing"
|
||||
import "strings"
|
||||
|
||||
|
||||
func TestProfileFormat(t *testing.T){
|
||||
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
err := traits.InitializeTraitVariables()
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
|
||||
}
|
||||
|
@ -199,7 +203,10 @@ func TestProfileGeneticReferences(t *testing.T){
|
|||
}
|
||||
}
|
||||
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList()
|
||||
if (err != nil) {
|
||||
|
|
|
@ -27,6 +27,9 @@ var predictionModel_Autism []byte
|
|||
//go:embed predictionModels/HomosexualnessModel.gob
|
||||
var predictionModel_Homosexualness []byte
|
||||
|
||||
//go:embed predictionModels/ObesityModel.gob
|
||||
var predictionModel_Obesity []byte
|
||||
|
||||
|
||||
//Outputs:
|
||||
// -bool: Model exists
|
||||
|
@ -50,6 +53,9 @@ func GetGeneticPredictionModelBytes(traitName string)(bool, []byte){
|
|||
case "Homosexualness":{
|
||||
return true, predictionModel_Homosexualness
|
||||
}
|
||||
case "Obesity":{
|
||||
return true, predictionModel_Obesity
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
|
@ -87,6 +93,9 @@ var predictionAccuracy_Autism []byte
|
|||
//go:embed predictionModelAccuracies/HomosexualnessModelAccuracy.gob
|
||||
var predictionAccuracy_Homosexualness []byte
|
||||
|
||||
//go:embed predictionModelAccuracies/ObesityModelAccuracy.gob
|
||||
var predictionAccuracy_Obesity []byte
|
||||
|
||||
|
||||
// The files returned by this function are .gob encoded geneticPrediction.NumericAttributePredictionAccuracyInfoMap objects
|
||||
func GetPredictionModelNumericAttributeAccuracyInfoBytes(attributeName string)([]byte, error){
|
||||
|
@ -101,6 +110,9 @@ func GetPredictionModelNumericAttributeAccuracyInfoBytes(attributeName string)([
|
|||
case "Homosexualness":{
|
||||
return predictionAccuracy_Homosexualness, nil
|
||||
}
|
||||
case "Obesity":{
|
||||
return predictionAccuracy_Obesity, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, errors.New("GetPredictionModelNumericAttributeAccuracyInfoBytes called with unknown attributeName: " + attributeName)
|
||||
|
|
|
@ -9,7 +9,7 @@ import "seekia/internal/genetics/geneticPrediction"
|
|||
|
||||
func TestGeneticPredictionModels(t *testing.T){
|
||||
|
||||
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism"}
|
||||
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Obesity"}
|
||||
|
||||
for _, attributeName := range attributeNamesList{
|
||||
|
||||
|
@ -43,7 +43,7 @@ func TestGeneticPredictionModelAccuracies(t *testing.T){
|
|||
}
|
||||
}
|
||||
|
||||
numericAttributeNamesList := []string{"Height", "Autism", "Homosexualness"}
|
||||
numericAttributeNamesList := []string{"Height", "Autism", "Homosexualness", "Obesity"}
|
||||
|
||||
for _, attributeName := range numericAttributeNamesList{
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -185,7 +185,10 @@ func TestGeneticReferences(t *testing.T){
|
|||
}
|
||||
}
|
||||
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil){
|
||||
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
}
|
||||
|
||||
polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList()
|
||||
if (err != nil) {
|
||||
|
|
Binary file not shown.
83
resources/geneticReferences/polygenicDiseases/obesity.go
Normal file
83
resources/geneticReferences/polygenicDiseases/obesity.go
Normal file
|
@ -0,0 +1,83 @@
|
|||
package polygenicDiseases
|
||||
|
||||
import "seekia/internal/helpers"
|
||||
|
||||
import _ "embed"
|
||||
|
||||
import "errors"
|
||||
import "encoding/gob"
|
||||
import "bytes"
|
||||
import "maps"
|
||||
|
||||
|
||||
//go:embed rsIDs/GiantObesityStudyLoci.gob
|
||||
var GiantObesityStudyLociFile []byte
|
||||
|
||||
|
||||
func getObesityDiseaseObject()(PolygenicDisease, error){
|
||||
|
||||
// Map Structure: rsID -> References Map
|
||||
locusReferencesMap := make(map[int64]map[string]string)
|
||||
|
||||
referencesMap_List1 := make(map[string]string)
|
||||
referencesMap_List1["GIANT consortium - Meta-analyses of Genome-Wide Association Studies - 2022 - Obesity"] = "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files"
|
||||
|
||||
// These SNPs are taken from the meta-analyses of Genome-Wide Association Studies (GWAS) created by the GIANT consortium
|
||||
//https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files
|
||||
|
||||
// Download link:
|
||||
// https://portals.broadinstitute.org/collaboration/giant/images/0/09/PublicRelease.WHRadjBMI.C.All.Add.txt.gz
|
||||
|
||||
// SHA-256 Checksum:
|
||||
// 2a863b0357037ae5c34853342052ed3c59735d6440da0fd15d1cab34b7d49daf
|
||||
|
||||
// See /utilities/extractGiantLoci/extractGiantLoci.go to see how they were extracted from the file
|
||||
|
||||
buffer := bytes.NewBuffer(GiantObesityStudyLociFile)
|
||||
decoder := gob.NewDecoder(buffer)
|
||||
|
||||
var lociList_1 []int64
|
||||
|
||||
err := decoder.Decode(&lociList_1)
|
||||
if (err != nil){ return PolygenicDisease{}, err }
|
||||
|
||||
for _, rsID := range lociList_1{
|
||||
locusReferencesMap[rsID] = maps.Clone(referencesMap_List1)
|
||||
}
|
||||
|
||||
obesityLociList := helpers.GetListOfMapKeys(locusReferencesMap)
|
||||
|
||||
referencesMap := make(map[string]string)
|
||||
referencesMap["Obesity Genome-Wide Association Study (GWAS) created by the GIANT consortium"] = "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files"
|
||||
|
||||
getAverageRiskProbabilitiesFunction := func(maleOrFemale string, inputAge int)(float64, error){
|
||||
|
||||
// Roughly 30% of people are obese.
|
||||
|
||||
if (maleOrFemale == "Male"){
|
||||
return 0.30, nil
|
||||
}
|
||||
|
||||
if (maleOrFemale != "Female"){
|
||||
return 0, errors.New("Trying to get breast cancer risk probability for invalid maleOrFemale: " + maleOrFemale)
|
||||
}
|
||||
|
||||
//TODO: Add different probabilities per age
|
||||
|
||||
return 0.30, nil
|
||||
}
|
||||
|
||||
obesityObject := PolygenicDisease{
|
||||
DiseaseName: "Obesity",
|
||||
EffectedSex: "Both",
|
||||
DiseaseDescription: "The condition of having an excessive amount of body fat.",
|
||||
LocusReferencesMap: locusReferencesMap,
|
||||
LociList: obesityLociList,
|
||||
GetAverageRiskProbabilitiesFunction: getAverageRiskProbabilitiesFunction,
|
||||
References: referencesMap,
|
||||
}
|
||||
|
||||
return obesityObject, nil
|
||||
}
|
||||
|
||||
|
|
@ -47,12 +47,15 @@ var polygenicDiseaseNamesList []string
|
|||
var polygenicDiseaseObjectsList []PolygenicDisease
|
||||
|
||||
// This must be called once during application startup
|
||||
func InitializePolygenicDiseaseVariables(){
|
||||
func InitializePolygenicDiseaseVariables()error{
|
||||
|
||||
breastCancerObject := getBreastCancerDiseaseObject()
|
||||
autismObject := getAutismDiseaseObject()
|
||||
|
||||
polygenicDiseaseObjectsList = []PolygenicDisease{breastCancerObject, autismObject}
|
||||
obesityObject, err := getObesityDiseaseObject()
|
||||
if (err != nil) { return err }
|
||||
|
||||
polygenicDiseaseObjectsList = []PolygenicDisease{breastCancerObject, autismObject, obesityObject}
|
||||
|
||||
polygenicDiseaseNamesList = make([]string, 0, len(polygenicDiseaseObjectsList))
|
||||
|
||||
|
@ -62,6 +65,8 @@ func InitializePolygenicDiseaseVariables(){
|
|||
|
||||
polygenicDiseaseNamesList = append(polygenicDiseaseNamesList, diseaseName)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Be aware that all of these functions are returning original objects/slices, not copies
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,2 @@
|
|||
### This folder contains files which are gob-encoded []int64
|
||||
### These int64s are rsIDs which influence various traits.
|
|
@ -48,9 +48,13 @@ import "time"
|
|||
|
||||
func main(){
|
||||
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil){
|
||||
panic(err)
|
||||
return
|
||||
}
|
||||
|
||||
err := traits.InitializeTraitVariables()
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil){
|
||||
panic(err)
|
||||
return
|
||||
|
@ -733,7 +737,7 @@ func setStartAndMonitorCreateTrainingDataPage(window fyne.Window, previousPage f
|
|||
if (err != nil) { return false, false, err }
|
||||
|
||||
//TODO: Add more attributes
|
||||
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness"}
|
||||
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness", "Obesity"}
|
||||
|
||||
// We create the folders for each attribute's training data
|
||||
|
||||
|
@ -1003,7 +1007,7 @@ func setTrainModelsPage(window fyne.Window, previousPage func()){
|
|||
description3 := getLabelCentered("This will take a while.")
|
||||
description4 := getLabelCentered("You must select a model to train.")
|
||||
|
||||
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness"}
|
||||
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness", "Obesity"}
|
||||
|
||||
attributeNameSelector := widget.NewSelect(attributeNamesList, nil)
|
||||
|
||||
|
@ -1269,7 +1273,8 @@ func setStartAndMonitorTrainModelPage(window fyne.Window, attributeName string,
|
|||
|
||||
case "Height",
|
||||
"Autism",
|
||||
"Homosexualness":{
|
||||
"Homosexualness",
|
||||
"Obesity":{
|
||||
return true, nil
|
||||
}
|
||||
case "Lactose Tolerance",
|
||||
|
@ -1362,7 +1367,7 @@ func setTestModelsPage(window fyne.Window, previousPage func()){
|
|||
description5 := getLabelCentered("The results will also be saved in the ModelAccuracies folder.")
|
||||
description6 := getLabelCentered("You must select a model to test.")
|
||||
|
||||
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness"}
|
||||
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness", "Obesity"}
|
||||
|
||||
attributeNameSelector := widget.NewSelect(attributeNamesList, nil)
|
||||
|
||||
|
@ -1432,7 +1437,8 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p
|
|||
|
||||
case "Height",
|
||||
"Autism",
|
||||
"Homosexualness":{
|
||||
"Homosexualness",
|
||||
"Obesity":{
|
||||
return true, nil
|
||||
}
|
||||
case "Lactose Tolerance",
|
||||
|
@ -2341,6 +2347,9 @@ func getTrainingAndTestingDataFilepathLists(attributeName string)([]string, []st
|
|||
case "Homosexualness":{
|
||||
return 14500, nil
|
||||
}
|
||||
case "Obesity":{
|
||||
return 24009, nil
|
||||
}
|
||||
}
|
||||
|
||||
return 0, errors.New("Unknown attributeName: " + attributeName)
|
||||
|
|
|
@ -31,7 +31,12 @@ func main(){
|
|||
}
|
||||
|
||||
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
||||
polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
|
||||
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
|
||||
if (err != nil) {
|
||||
log.Println("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
err = traits.InitializeTraitVariables()
|
||||
if (err != nil) {
|
||||
|
|
2
utilities/extractGiantLoci/.gitignore
vendored
2
utilities/extractGiantLoci/.gitignore
vendored
|
@ -1,3 +1,5 @@
|
|||
GiantHeightStudy.txt
|
||||
GiantHeightStudyLoci.gob
|
||||
GiantObesityStudy.txt
|
||||
GiantObesityStudyLoci.gob
|
||||
NewLocusMetadata.gob
|
|
@ -5,18 +5,28 @@
|
|||
// The files are a tab-delimeted file of rsIDs and their effect on a particular trait
|
||||
|
||||
// The output file is a .gob encoded []int64 of the top 1000 most impactful loci on the trait.
|
||||
// These files are then saved into /resources/geneticReferences/traits/rsIDs
|
||||
// These files are then saved into the following folders:
|
||||
// -Height -> /resources/geneticReferences/traits/rsIDs
|
||||
// -Obesity -> /resources/geneticReferences/polygenicDiseases/rsIDs
|
||||
// The loci metadata for loci from these files is also imported into the locusMetadata package to enable them to be used in Seekia
|
||||
|
||||
package main
|
||||
|
||||
// Here is the file I used to extract causal rsIDs for height
|
||||
// Download link:
|
||||
// https://portals.broadinstitute.org/collaboration/giant/images/4/4e/GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_ALL.gz
|
||||
// Here are the files I used to extract causal rsIDs
|
||||
|
||||
//SHA-256 Checksum:
|
||||
// Trait: Height
|
||||
// Download Link:
|
||||
// https://portals.broadinstitute.org/collaboration/giant/images/4/4e/GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_ALL.gz
|
||||
// SHA-256 Checksum:
|
||||
// db18859724675f2f9ba86eff28cb4dacac0629c0b25c9806a6cf2eed6bb8b71e
|
||||
|
||||
// Trait: Obesity (Waist-to-hip-ratio)
|
||||
// Download Link:
|
||||
// https://portals.broadinstitute.org/collaboration/giant/images/0/09/PublicRelease.WHRadjBMI.C.All.Add.txt.gz
|
||||
// SHA-256 Checksum:
|
||||
// 2a863b0357037ae5c34853342052ed3c59735d6440da0fd15d1cab34b7d49daf
|
||||
|
||||
|
||||
import "seekia/resources/geneticReferences/locusMetadata"
|
||||
import "seekia/resources/geneticReferences/modifyLocusMetadata"
|
||||
|
||||
|
@ -40,9 +50,14 @@ func main(){
|
|||
|
||||
extractGiantLoci := func()error{
|
||||
|
||||
fileBytes, err := os.ReadFile("./GiantHeightStudy.txt")
|
||||
if (err != nil) {
|
||||
return errors.New("Could not open GiantHeightStudy.txt file: " + err.Error())
|
||||
// heightOrObesity := "Height"
|
||||
heightOrObesity := "Obesity"
|
||||
|
||||
filepath := "./Giant" + heightOrObesity + "Study.txt"
|
||||
|
||||
fileBytes, err := os.ReadFile(filepath)
|
||||
if (err != nil){
|
||||
return errors.New("Could not open " + filepath + ": " + err.Error())
|
||||
}
|
||||
|
||||
fileReader := bytes.NewReader(fileBytes)
|
||||
|
@ -51,11 +66,11 @@ func main(){
|
|||
|
||||
// We first read the header line
|
||||
|
||||
//These are the columns of the file:
|
||||
//These are the columns of the Height file:
|
||||
|
||||
// COLUMN DESCRIPTION FOR FILE NAMED GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_*.gz
|
||||
// Filename: GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_*.gz
|
||||
// - SNPID
|
||||
// -represented as CHR:POS:REF:ALT)
|
||||
// -represented as CHR:POS:REF:ALT
|
||||
// - RSID
|
||||
// -RS NUMBER, WHEN AVAILABLE
|
||||
// - CHR
|
||||
|
@ -82,6 +97,22 @@ func main(){
|
|||
// - N
|
||||
// -Total sample size used in the GWAS analysis
|
||||
|
||||
// These are the columns of the Obesity (WHR) File:
|
||||
// Filename: PublicRelease.WHRadjBMI.C.All.Add.txt.gz
|
||||
//
|
||||
// -1. snpname - dbSNP rsID
|
||||
// -2. chr - chromosome
|
||||
// -3. pos - position
|
||||
// -4. markername - chr:pos
|
||||
// -5. ref - reference allele (hg19 + strand)
|
||||
// -6. alt - alternate allele (hg19 + strand)
|
||||
// -7. beta - beta
|
||||
// -8. se - standard error
|
||||
// -9. pvalue - P value
|
||||
// -10. n - sample size
|
||||
// -11. gmaf/eur_maf - alternate allele frequency in 1000 Genome Combined/European Ancestries
|
||||
// -12. exac_maf/exac_nfe_maf -alternate allele frequency in ExAC Combined/Non-Finnish European Ancestries
|
||||
|
||||
_, err = bufioReader.ReadString('\n')
|
||||
if (err != nil) { return err }
|
||||
|
||||
|
@ -91,7 +122,7 @@ func main(){
|
|||
Effect float64
|
||||
}
|
||||
|
||||
rsidsInfoMap := make(map[int64]LocusInfo)
|
||||
lociInfoMap := make(map[int64]LocusInfo)
|
||||
|
||||
for {
|
||||
|
||||
|
@ -102,58 +133,132 @@ func main(){
|
|||
// We have reached the end of the file
|
||||
break
|
||||
}
|
||||
|
||||
// File is corrupt
|
||||
return errors.New("Error reading file: " + err.Error())
|
||||
}
|
||||
|
||||
lineElementsSlice := strings.Split(string(rsidInfoLine), "\t")
|
||||
|
||||
//Outputs:
|
||||
// -bool: Locus information is available
|
||||
// -int64: Locus rsID
|
||||
// -int: Locus Chromosome
|
||||
// -int: Locus Position
|
||||
// -float64: Locus effect
|
||||
// -error
|
||||
getLocusInfo := func()(bool, int64, int, int, float64, error){
|
||||
|
||||
if (heightOrObesity == "Height"){
|
||||
|
||||
rsidString := lineElementsSlice[1]
|
||||
rsidChromosomeString := lineElementsSlice[2]
|
||||
rsidPositionString := lineElementsSlice[3]
|
||||
rsidEffectString := lineElementsSlice[7]
|
||||
locusChromosomeString := lineElementsSlice[2]
|
||||
locusPositionString := lineElementsSlice[3]
|
||||
locusEffectString := lineElementsSlice[7]
|
||||
|
||||
rsidWithoutPrefix, prefixFound := strings.CutPrefix(rsidString, "rs")
|
||||
if (prefixFound == false){
|
||||
// Some of the rsIDs are not formatted in the "rs123456" format
|
||||
// We skip those
|
||||
//log.Println("rs prefix not found in rsID: " + rsIDString)
|
||||
continue
|
||||
// log.Println("rs prefix not found in rsID: " + rsidString)
|
||||
return false, 0, 0, 0, 0, nil
|
||||
}
|
||||
|
||||
rsID, err := helpers.ConvertStringToInt64(rsidWithoutPrefix)
|
||||
if (err != nil){
|
||||
return errors.New("RSID is invalid: " + err.Error())
|
||||
return false, 0, 0, 0, 0, errors.New("RSID is invalid: " + err.Error())
|
||||
}
|
||||
|
||||
rsidChromosome, err := helpers.ConvertStringToInt(rsidChromosomeString)
|
||||
if (err != nil){ return err }
|
||||
locusChromosome, err := helpers.ConvertStringToInt(locusChromosomeString)
|
||||
if (err != nil){
|
||||
return false, 0, 0, 0, 0, errors.New("Locus Chromosome is invalid: " + err.Error())
|
||||
}
|
||||
|
||||
rsidPosition, err := helpers.ConvertStringToInt(rsidPositionString)
|
||||
if (err != nil){ return err }
|
||||
locusPosition, err := helpers.ConvertStringToInt(locusPositionString)
|
||||
if (err != nil){
|
||||
return false, 0, 0, 0, 0, errors.New("Locus Position is invalid: " + err.Error())
|
||||
}
|
||||
|
||||
rsidEffectRaw, err := helpers.ConvertStringToFloat64(rsidEffectString)
|
||||
locusEffectRaw, err := helpers.ConvertStringToFloat64(locusEffectString)
|
||||
if (err != nil) {
|
||||
if (rsidEffectString == ""){
|
||||
if (locusEffectString == ""){
|
||||
// The database has at least 1 entry with no effect provided
|
||||
continue
|
||||
return false, 0, 0, 0, 0, nil
|
||||
}
|
||||
return err
|
||||
return false, 0, 0, 0, 0, errors.New("RSID effect is invalid: " + err.Error())
|
||||
}
|
||||
|
||||
return true, rsID, locusChromosome, locusPosition, locusEffectRaw, nil
|
||||
}
|
||||
|
||||
rsidString := lineElementsSlice[0]
|
||||
locusChromosomeString := lineElementsSlice[1]
|
||||
locusPositionString := lineElementsSlice[2]
|
||||
locusEffectString := lineElementsSlice[6]
|
||||
|
||||
if (rsidString == "-" || rsidString == ""){
|
||||
return false, 0, 0, 0, 0, nil
|
||||
}
|
||||
|
||||
rsidWithoutPrefix, prefixFound := strings.CutPrefix(rsidString, "rs")
|
||||
if (prefixFound == false){
|
||||
return false, 0, 0, 0, 0, errors.New("Obesity GWAS file contains invalid rsID: " + rsidString)
|
||||
}
|
||||
|
||||
rsID, err := helpers.ConvertStringToInt64(rsidWithoutPrefix)
|
||||
if (err != nil){
|
||||
return false, 0, 0, 0, 0, errors.New("RSID is invalid: " + err.Error())
|
||||
}
|
||||
|
||||
locusChromosome, err := helpers.ConvertStringToInt(locusChromosomeString)
|
||||
if (err != nil){
|
||||
|
||||
if (locusChromosomeString == "X"){
|
||||
// TODO: Add the ability to read these chromosomes
|
||||
return false, 0, 0, 0, 0, nil
|
||||
}
|
||||
|
||||
return false, 0, 0, 0, 0, errors.New("Locus Chromosome is invalid: " + err.Error())
|
||||
}
|
||||
|
||||
locusPosition, err := helpers.ConvertStringToInt(locusPositionString)
|
||||
if (err != nil){
|
||||
|
||||
hasSuffix := strings.HasSuffix(locusPositionString, "+08")
|
||||
if (hasSuffix == true){
|
||||
// This is an invalid entry in the file
|
||||
return false, 0, 0, 0, 0, nil
|
||||
}
|
||||
return false, 0, 0, 0, 0, errors.New("Locus Position is invalid: " + err.Error())
|
||||
}
|
||||
|
||||
locusEffectRaw, err := helpers.ConvertStringToFloat64(locusEffectString)
|
||||
if (err != nil) {
|
||||
return false, 0, 0, 0, 0, errors.New("RSID effect is invalid: " + err.Error())
|
||||
}
|
||||
|
||||
return true, rsID, locusChromosome, locusPosition, locusEffectRaw, nil
|
||||
}
|
||||
|
||||
locusInfoExists, locusRSID, locusChromosome, locusPosition, locusEffectRaw, err := getLocusInfo()
|
||||
if (err != nil) { return err }
|
||||
if (locusInfoExists == false){
|
||||
continue
|
||||
}
|
||||
|
||||
// Effect can be negative, we make it positive
|
||||
rsidEffect := math.Abs(rsidEffectRaw)
|
||||
locusEffect := math.Abs(locusEffectRaw)
|
||||
|
||||
existingLocusValue, exists := rsidsInfoMap[rsID]
|
||||
existingLocusValue, exists := lociInfoMap[locusRSID]
|
||||
if (exists == false){
|
||||
|
||||
newLocusInfo := LocusInfo{
|
||||
Chromosome: rsidChromosome,
|
||||
Position: rsidPosition,
|
||||
Effect: rsidEffect,
|
||||
Chromosome: locusChromosome,
|
||||
Position: locusPosition,
|
||||
Effect: locusEffect,
|
||||
}
|
||||
|
||||
rsidsInfoMap[rsID] = newLocusInfo
|
||||
lociInfoMap[locusRSID] = newLocusInfo
|
||||
} else {
|
||||
|
||||
// We see if the effect of this allele is greater
|
||||
|
@ -165,24 +270,24 @@ func main(){
|
|||
existingPosition := existingLocusValue.Position
|
||||
existingEffect := existingLocusValue.Effect
|
||||
|
||||
if (existingChromosome != rsidChromosome){
|
||||
if (existingChromosome != locusChromosome){
|
||||
return errors.New("GIANT gwas contains two rsIDs with conflicting chromosomes.")
|
||||
}
|
||||
if (existingPosition != rsidPosition){
|
||||
if (existingPosition != locusPosition){
|
||||
return errors.New("GIANT gwas contains two rsIDs with conflicting positions.")
|
||||
}
|
||||
if (existingEffect < rsidEffect){
|
||||
if (existingEffect < locusEffect){
|
||||
|
||||
// We update the value with the new effect
|
||||
existingLocusValue.Effect = rsidEffect
|
||||
rsidsInfoMap[rsID] = existingLocusValue
|
||||
existingLocusValue.Effect = locusEffect
|
||||
lociInfoMap[locusRSID] = existingLocusValue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We find the top 10,000 rsIDs with the greatest effect
|
||||
|
||||
rsidsList := helpers.GetListOfMapKeys(rsidsInfoMap)
|
||||
rsidsList := helpers.GetListOfMapKeys(lociInfoMap)
|
||||
|
||||
compareFunction := func(rsid1 int64, rsid2 int64)int{
|
||||
|
||||
|
@ -190,14 +295,14 @@ func main(){
|
|||
panic("Identical rsIDs found during sort.")
|
||||
}
|
||||
|
||||
rsid1Info, exists := rsidsInfoMap[rsid1]
|
||||
rsid1Info, exists := lociInfoMap[rsid1]
|
||||
if (exists == false){
|
||||
panic("rsid1 is missing from rsidsInfoMap.")
|
||||
panic("rsid1 is missing from lociInfoMap.")
|
||||
}
|
||||
|
||||
rsid2Info, exists := rsidsInfoMap[rsid2]
|
||||
rsid2Info, exists := lociInfoMap[rsid2]
|
||||
if (exists == false){
|
||||
panic("rsid2 is missing from rsidsInfoMap.")
|
||||
panic("rsid2 is missing from lociInfoMap.")
|
||||
}
|
||||
|
||||
rsid1Effect := rsid1Info.Effect
|
||||
|
@ -225,17 +330,16 @@ func main(){
|
|||
|
||||
for _, rsID := range mostImpactfulLoci{
|
||||
|
||||
locusInfo, exists := rsidsInfoMap[rsID]
|
||||
locusInfo, exists := lociInfoMap[rsID]
|
||||
if (exists == false){
|
||||
return errors.New("rsidsInfoMap missing rsID.")
|
||||
return errors.New("lociInfoMap missing rsID.")
|
||||
}
|
||||
|
||||
locusChromosome := locusInfo.Chromosome
|
||||
locusPosition := locusInfo.Position
|
||||
|
||||
locusReferencesMap := map[string]string{
|
||||
"Height Genome-Wide Association Study (GWAS) created by the GIANT consortium": "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files",
|
||||
}
|
||||
locusReferencesMap := make(map[string]string)
|
||||
locusReferencesMap[heightOrObesity + " Genome-Wide Association Study (GWAS) created by the GIANT consortium"] = "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files"
|
||||
|
||||
newLocusMetadata := locusMetadata.LocusMetadata{
|
||||
RSIDsList: []int64{rsID},
|
||||
|
@ -251,12 +355,16 @@ func main(){
|
|||
locusMetadatasToAddList = append(locusMetadatasToAddList, newLocusMetadata)
|
||||
}
|
||||
|
||||
// We add the locus metadatas
|
||||
|
||||
_, newLocusMetadataFileBytes, err := modifyLocusMetadata.AddLocusMetadata(locusMetadatasToAddList)
|
||||
if (err != nil) { return err }
|
||||
|
||||
err = localFilesystem.CreateOrOverwriteFile(newLocusMetadataFileBytes, "./", "NewLocusMetadata.gob")
|
||||
if (err != nil){ return err }
|
||||
|
||||
// We create the rsIDs list file
|
||||
|
||||
buffer := new(bytes.Buffer)
|
||||
|
||||
gobEncoder := gob.NewEncoder(buffer)
|
||||
|
@ -266,7 +374,9 @@ func main(){
|
|||
|
||||
encodedBytes := buffer.Bytes()
|
||||
|
||||
err = localFilesystem.CreateOrOverwriteFile(encodedBytes, "./", "GiantHeightStudyLoci.gob")
|
||||
filename := "Giant" + heightOrObesity + "StudyLoci.gob"
|
||||
|
||||
err = localFilesystem.CreateOrOverwriteFile(encodedBytes, "./", filename)
|
||||
if (err != nil){ return err }
|
||||
|
||||
return nil
|
||||
|
|
Loading…
Reference in a new issue