diff --git a/Changelog.md b/Changelog.md index 58988ca..4a4dde7 100644 --- a/Changelog.md +++ b/Changelog.md @@ -6,6 +6,7 @@ Small and insignificant changes may not be included in this log. ## Unversioned Changes +* Added the Obesity disease to genetic analyses. - *Simon Sarasova* * Implemented neural network prediction for polygenic diseases to replace old method. Added autism and homosexualness to genetic analyses. - *Simon Sarasova* * Increased the quantity of attributes that are extracted from the OpenSNP biobank data archive. - *Simon Sarasova* * Added numeric traits to genetic analyses. - *Simon Sarasova* diff --git a/Contributors.md b/Contributors.md index 4630e50..7a851f0 100644 --- a/Contributors.md +++ b/Contributors.md @@ -9,4 +9,4 @@ Many other people have written code for modules which are imported by Seekia. Th Name | Date Of First Commit | Number Of Commits --- | --- | --- -Simon Sarasova | June 13, 2023 | 278 \ No newline at end of file +Simon Sarasova | June 13, 2023 | 279 \ No newline at end of file diff --git a/internal/appUsers/appUsers.go b/internal/appUsers/appUsers.go index c69d3e9..8e8c38c 100644 --- a/internal/appUsers/appUsers.go +++ b/internal/appUsers/appUsers.go @@ -395,7 +395,8 @@ func initializeApplicationVariables()error{ monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() + err = polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { return err } err = traits.InitializeTraitVariables() if (err != nil) { return err } diff --git a/internal/generate/generate_test.go b/internal/generate/generate_test.go index 30d29e6..ee4e768 100644 --- a/internal/generate/generate_test.go +++ b/internal/generate/generate_test.go @@ -37,9 +37,13 @@ func TestGenerateParameters(t *testing.T){ func TestGenerateProfiles(t *testing.T){ monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() - err := traits.InitializeTraitVariables() + err := polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } + + err = traits.InitializeTraitVariables() if (err != nil) { t.Fatalf("InitializeTraitVariables failed: " + err.Error()) } diff --git a/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis_test.go b/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis_test.go index bc6852c..abd3c84 100644 --- a/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis_test.go +++ b/internal/genetics/createCoupleGeneticAnalysis/createCoupleGeneticAnalysis_test.go @@ -25,7 +25,11 @@ func TestCreateCoupleGeneticAnalysis_SingleGenomes(t *testing.T){ } monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() + + err = polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } err = traits.InitializeTraitVariables() if (err != nil) { @@ -104,7 +108,11 @@ func TestCreateCoupleGeneticAnalysis_SingleAndMultipleGenomes(t *testing.T){ } monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() + + err = polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } err = traits.InitializeTraitVariables() if (err != nil) { @@ -206,7 +214,11 @@ func TestCreateCoupleGeneticAnalysis_MultipleGenomes(t *testing.T){ } monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() + + err = polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } err = traits.InitializeTraitVariables() if (err != nil) { diff --git a/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis_test.go b/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis_test.go index 408ff7f..90d699a 100644 --- a/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis_test.go +++ b/internal/genetics/createPersonGeneticAnalysis/createPersonGeneticAnalysis_test.go @@ -25,7 +25,11 @@ func TestCreatePersonGeneticAnalysis_SingleGenome(t *testing.T){ } monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() + + err = polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } err = traits.InitializeTraitVariables() if (err != nil) { @@ -88,7 +92,11 @@ func TestCreatePersonGeneticAnalysis_MultipleGenomes(t *testing.T){ } monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() + + err = polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } err = traits.InitializeTraitVariables() if (err != nil) { diff --git a/internal/genetics/geneticPrediction/geneticPrediction.go b/internal/genetics/geneticPrediction/geneticPrediction.go index 858657a..a75e4a5 100644 --- a/internal/genetics/geneticPrediction/geneticPrediction.go +++ b/internal/genetics/geneticPrediction/geneticPrediction.go @@ -804,7 +804,8 @@ func GetNumericOutcomeValueFromOutputLayer(attributeName string, outputLayer []f return 54, 272, nil } case "Autism", - "Homosexualness":{ + "Homosexualness", + "Obesity":{ return 0, 10, nil } } @@ -860,6 +861,11 @@ func getNeuralNetworkLayerSizes(attributeName string)(int, int, int, int, error) // There is 1 output neuron, representing a homosexualness value return 12, 10, 5, 1, nil } + case "Obesity":{ + // There are 3000 input neurons + // There is 1 output neuron, representing an obesity value + return 3000, 3, 2, 1, nil + } } return 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown attributeName: " + attributeName) @@ -931,7 +937,8 @@ func CreateGeneticPredictionTrainingData_OpenSNP( return traitLociList, nil } - case "Autism":{ + case "Autism", + "Obesity":{ diseaseObject, err := polygenicDiseases.GetPolygenicDiseaseObject(attributeName) if (err != nil) { return nil, err } @@ -1106,6 +1113,21 @@ func CreateGeneticPredictionTrainingData_OpenSNP( outputLayer := []float32{outputValueFloat32} + return true, outputLayer, nil + } + case "Obesity":{ + + userObesityIsKnown := userPhenotypeDataObject.ObesityIsKnown + if (userObesityIsKnown == false){ + return false, nil, nil + } + + userObesity := userPhenotypeDataObject.Obesity + + outputValueFloat32 := float32(userObesity) + + outputLayer := []float32{outputValueFloat32} + return true, outputLayer, nil } } diff --git a/internal/genetics/sampleAnalyses/SampleCoupleAnalysis.messagepack b/internal/genetics/sampleAnalyses/SampleCoupleAnalysis.messagepack index 52b1029..a46ea62 100644 Binary files a/internal/genetics/sampleAnalyses/SampleCoupleAnalysis.messagepack and b/internal/genetics/sampleAnalyses/SampleCoupleAnalysis.messagepack differ diff --git a/internal/genetics/sampleAnalyses/SamplePerson1Analysis.messagepack b/internal/genetics/sampleAnalyses/SamplePerson1Analysis.messagepack index f4135b6..70e2b07 100644 Binary files a/internal/genetics/sampleAnalyses/SamplePerson1Analysis.messagepack and b/internal/genetics/sampleAnalyses/SamplePerson1Analysis.messagepack differ diff --git a/internal/genetics/sampleAnalyses/SamplePerson2Analysis.messagepack b/internal/genetics/sampleAnalyses/SamplePerson2Analysis.messagepack index 1cb3aff..09d79e7 100644 Binary files a/internal/genetics/sampleAnalyses/SamplePerson2Analysis.messagepack and b/internal/genetics/sampleAnalyses/SamplePerson2Analysis.messagepack differ diff --git a/internal/genetics/sampleAnalyses/sampleAnalyses_test.go b/internal/genetics/sampleAnalyses/sampleAnalyses_test.go index d65c430..6f6a2df 100644 --- a/internal/genetics/sampleAnalyses/sampleAnalyses_test.go +++ b/internal/genetics/sampleAnalyses/sampleAnalyses_test.go @@ -15,9 +15,13 @@ import "testing" func TestPersonSampleAnalyses(t *testing.T){ monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() - err := traits.InitializeTraitVariables() + err := polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } + + err = traits.InitializeTraitVariables() if (err != nil) { t.Fatalf("InitializeTraitVariables failed: " + err.Error()) } @@ -47,9 +51,13 @@ func TestPersonSampleAnalyses(t *testing.T){ func TestCoupleSampleAnalyses(t *testing.T){ monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() - err := traits.InitializeTraitVariables() + err := polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } + + err = traits.InitializeTraitVariables() if (err != nil) { t.Fatalf("InitializeTraitVariables failed: " + err.Error()) } diff --git a/internal/network/serverRequest/serverRequest_test.go b/internal/network/serverRequest/serverRequest_test.go index 8376e82..b6960ff 100644 --- a/internal/network/serverRequest/serverRequest_test.go +++ b/internal/network/serverRequest/serverRequest_test.go @@ -1382,14 +1382,18 @@ func TestCreateAndReadRequest_BroadcastContent(t *testing.T){ // We initialize these variables so we can create fake profiles - err := traits.InitializeTraitVariables() + monogenicDiseases.InitializeMonogenicDiseaseVariables() + + err := polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } + + err = traits.InitializeTraitVariables() if (err != nil) { t.Fatalf("InitializeTraitVariables failed: " + err.Error()) } - monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() - err = profileFormat.InitializeProfileFormatVariables() if (err != nil) { t.Fatalf("Failed to initialize profile format variables: " + err.Error()) diff --git a/internal/network/serverResponse/serverResponse_test.go b/internal/network/serverResponse/serverResponse_test.go index 407ef36..ce5c493 100644 --- a/internal/network/serverResponse/serverResponse_test.go +++ b/internal/network/serverResponse/serverResponse_test.go @@ -326,14 +326,18 @@ func TestCreateAndReadResponse_GetProfilesInfo(t *testing.T){ func TestCreateAndReadResponse_GetProfiles(t *testing.T){ - err := traits.InitializeTraitVariables() + monogenicDiseases.InitializeMonogenicDiseaseVariables() + + err := polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } + + err = traits.InitializeTraitVariables() if (err != nil) { t.Fatalf("InitializeTraitVariables failed: " + err.Error()) } - monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() - err = profileFormat.InitializeProfileFormatVariables() if (err != nil) { t.Fatalf("Failed to initialize profile format variables: " + err.Error()) diff --git a/internal/profiles/attributeDisplay/attributeDisplay_test.go b/internal/profiles/attributeDisplay/attributeDisplay_test.go index d548430..8d1a09d 100644 --- a/internal/profiles/attributeDisplay/attributeDisplay_test.go +++ b/internal/profiles/attributeDisplay/attributeDisplay_test.go @@ -19,7 +19,10 @@ func TestGetAttributeDisplayInfo(t *testing.T){ t.Fatalf("InitializeGlobalSettingsDatastore failed: " + err.Error()) } - polygenicDiseases.InitializePolygenicDiseaseVariables() + err = polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } err = traits.InitializeTraitVariables() if (err != nil) { diff --git a/internal/profiles/profileFormat/profileFormat_test.go b/internal/profiles/profileFormat/profileFormat_test.go index a47f024..807e063 100644 --- a/internal/profiles/profileFormat/profileFormat_test.go +++ b/internal/profiles/profileFormat/profileFormat_test.go @@ -10,11 +10,15 @@ import "seekia/internal/helpers" import "testing" import "strings" + func TestProfileFormat(t *testing.T){ - polygenicDiseases.InitializePolygenicDiseaseVariables() + err := polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } - err := traits.InitializeTraitVariables() + err = traits.InitializeTraitVariables() if (err != nil) { t.Fatalf("InitializeTraitVariables failed: " + err.Error()) } @@ -199,7 +203,10 @@ func TestProfileGeneticReferences(t *testing.T){ } } - polygenicDiseases.InitializePolygenicDiseaseVariables() + err = polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList() if (err != nil) { diff --git a/resources/geneticPredictionModels/geneticPredictionModels.go b/resources/geneticPredictionModels/geneticPredictionModels.go index d2fdf03..7a4edd0 100644 --- a/resources/geneticPredictionModels/geneticPredictionModels.go +++ b/resources/geneticPredictionModels/geneticPredictionModels.go @@ -27,6 +27,9 @@ var predictionModel_Autism []byte //go:embed predictionModels/HomosexualnessModel.gob var predictionModel_Homosexualness []byte +//go:embed predictionModels/ObesityModel.gob +var predictionModel_Obesity []byte + //Outputs: // -bool: Model exists @@ -50,6 +53,9 @@ func GetGeneticPredictionModelBytes(traitName string)(bool, []byte){ case "Homosexualness":{ return true, predictionModel_Homosexualness } + case "Obesity":{ + return true, predictionModel_Obesity + } } return false, nil @@ -87,6 +93,9 @@ var predictionAccuracy_Autism []byte //go:embed predictionModelAccuracies/HomosexualnessModelAccuracy.gob var predictionAccuracy_Homosexualness []byte +//go:embed predictionModelAccuracies/ObesityModelAccuracy.gob +var predictionAccuracy_Obesity []byte + // The files returned by this function are .gob encoded geneticPrediction.NumericAttributePredictionAccuracyInfoMap objects func GetPredictionModelNumericAttributeAccuracyInfoBytes(attributeName string)([]byte, error){ @@ -101,6 +110,9 @@ func GetPredictionModelNumericAttributeAccuracyInfoBytes(attributeName string)([ case "Homosexualness":{ return predictionAccuracy_Homosexualness, nil } + case "Obesity":{ + return predictionAccuracy_Obesity, nil + } } return nil, errors.New("GetPredictionModelNumericAttributeAccuracyInfoBytes called with unknown attributeName: " + attributeName) diff --git a/resources/geneticPredictionModels/geneticPredictionModels_test.go b/resources/geneticPredictionModels/geneticPredictionModels_test.go index bccc1f8..b969850 100644 --- a/resources/geneticPredictionModels/geneticPredictionModels_test.go +++ b/resources/geneticPredictionModels/geneticPredictionModels_test.go @@ -9,7 +9,7 @@ import "seekia/internal/genetics/geneticPrediction" func TestGeneticPredictionModels(t *testing.T){ - attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism"} + attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Obesity"} for _, attributeName := range attributeNamesList{ @@ -43,7 +43,7 @@ func TestGeneticPredictionModelAccuracies(t *testing.T){ } } - numericAttributeNamesList := []string{"Height", "Autism", "Homosexualness"} + numericAttributeNamesList := []string{"Height", "Autism", "Homosexualness", "Obesity"} for _, attributeName := range numericAttributeNamesList{ diff --git a/resources/geneticPredictionModels/predictionModelAccuracies/ObesityModelAccuracy.gob b/resources/geneticPredictionModels/predictionModelAccuracies/ObesityModelAccuracy.gob new file mode 100644 index 0000000..7563ded Binary files /dev/null and b/resources/geneticPredictionModels/predictionModelAccuracies/ObesityModelAccuracy.gob differ diff --git a/resources/geneticPredictionModels/predictionModels/ObesityModel.gob b/resources/geneticPredictionModels/predictionModels/ObesityModel.gob new file mode 100644 index 0000000..fc4ccca Binary files /dev/null and b/resources/geneticPredictionModels/predictionModels/ObesityModel.gob differ diff --git a/resources/geneticReferences/geneticReferences_test.go b/resources/geneticReferences/geneticReferences_test.go index 4ca39d3..1666efd 100644 --- a/resources/geneticReferences/geneticReferences_test.go +++ b/resources/geneticReferences/geneticReferences_test.go @@ -185,7 +185,10 @@ func TestGeneticReferences(t *testing.T){ } } - polygenicDiseases.InitializePolygenicDiseaseVariables() + err = polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil){ + t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error()) + } polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList() if (err != nil) { diff --git a/resources/geneticReferences/locusMetadata/LocusMetadata.gob b/resources/geneticReferences/locusMetadata/LocusMetadata.gob index c81c03b..2ffbb1b 100644 Binary files a/resources/geneticReferences/locusMetadata/LocusMetadata.gob and b/resources/geneticReferences/locusMetadata/LocusMetadata.gob differ diff --git a/resources/geneticReferences/polygenicDiseases/obesity.go b/resources/geneticReferences/polygenicDiseases/obesity.go new file mode 100644 index 0000000..cf6fee2 --- /dev/null +++ b/resources/geneticReferences/polygenicDiseases/obesity.go @@ -0,0 +1,83 @@ +package polygenicDiseases + +import "seekia/internal/helpers" + +import _ "embed" + +import "errors" +import "encoding/gob" +import "bytes" +import "maps" + + +//go:embed rsIDs/GiantObesityStudyLoci.gob +var GiantObesityStudyLociFile []byte + + +func getObesityDiseaseObject()(PolygenicDisease, error){ + + // Map Structure: rsID -> References Map + locusReferencesMap := make(map[int64]map[string]string) + + referencesMap_List1 := make(map[string]string) + referencesMap_List1["GIANT consortium - Meta-analyses of Genome-Wide Association Studies - 2022 - Obesity"] = "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files" + + // These SNPs are taken from the meta-analyses of Genome-Wide Association Studies (GWAS) created by the GIANT consortium + //https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files + + // Download link: + // https://portals.broadinstitute.org/collaboration/giant/images/0/09/PublicRelease.WHRadjBMI.C.All.Add.txt.gz + + // SHA-256 Checksum: + // 2a863b0357037ae5c34853342052ed3c59735d6440da0fd15d1cab34b7d49daf + + // See /utilities/extractGiantLoci/extractGiantLoci.go to see how they were extracted from the file + + buffer := bytes.NewBuffer(GiantObesityStudyLociFile) + decoder := gob.NewDecoder(buffer) + + var lociList_1 []int64 + + err := decoder.Decode(&lociList_1) + if (err != nil){ return PolygenicDisease{}, err } + + for _, rsID := range lociList_1{ + locusReferencesMap[rsID] = maps.Clone(referencesMap_List1) + } + + obesityLociList := helpers.GetListOfMapKeys(locusReferencesMap) + + referencesMap := make(map[string]string) + referencesMap["Obesity Genome-Wide Association Study (GWAS) created by the GIANT consortium"] = "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files" + + getAverageRiskProbabilitiesFunction := func(maleOrFemale string, inputAge int)(float64, error){ + + // Roughly 30% of people are obese. + + if (maleOrFemale == "Male"){ + return 0.30, nil + } + + if (maleOrFemale != "Female"){ + return 0, errors.New("Trying to get breast cancer risk probability for invalid maleOrFemale: " + maleOrFemale) + } + + //TODO: Add different probabilities per age + + return 0.30, nil + } + + obesityObject := PolygenicDisease{ + DiseaseName: "Obesity", + EffectedSex: "Both", + DiseaseDescription: "The condition of having an excessive amount of body fat.", + LocusReferencesMap: locusReferencesMap, + LociList: obesityLociList, + GetAverageRiskProbabilitiesFunction: getAverageRiskProbabilitiesFunction, + References: referencesMap, + } + + return obesityObject, nil +} + + diff --git a/resources/geneticReferences/polygenicDiseases/polygenicDiseases.go b/resources/geneticReferences/polygenicDiseases/polygenicDiseases.go index bef6697..1564cb3 100644 --- a/resources/geneticReferences/polygenicDiseases/polygenicDiseases.go +++ b/resources/geneticReferences/polygenicDiseases/polygenicDiseases.go @@ -47,12 +47,15 @@ var polygenicDiseaseNamesList []string var polygenicDiseaseObjectsList []PolygenicDisease // This must be called once during application startup -func InitializePolygenicDiseaseVariables(){ +func InitializePolygenicDiseaseVariables()error{ breastCancerObject := getBreastCancerDiseaseObject() autismObject := getAutismDiseaseObject() - polygenicDiseaseObjectsList = []PolygenicDisease{breastCancerObject, autismObject} + obesityObject, err := getObesityDiseaseObject() + if (err != nil) { return err } + + polygenicDiseaseObjectsList = []PolygenicDisease{breastCancerObject, autismObject, obesityObject} polygenicDiseaseNamesList = make([]string, 0, len(polygenicDiseaseObjectsList)) @@ -62,6 +65,8 @@ func InitializePolygenicDiseaseVariables(){ polygenicDiseaseNamesList = append(polygenicDiseaseNamesList, diseaseName) } + + return nil } // Be aware that all of these functions are returning original objects/slices, not copies diff --git a/resources/geneticReferences/polygenicDiseases/rsIDs/GiantObesityStudyLoci.gob b/resources/geneticReferences/polygenicDiseases/rsIDs/GiantObesityStudyLoci.gob new file mode 100644 index 0000000..9cde1a2 Binary files /dev/null and b/resources/geneticReferences/polygenicDiseases/rsIDs/GiantObesityStudyLoci.gob differ diff --git a/resources/geneticReferences/polygenicDiseases/rsIDs/ReadMe.md b/resources/geneticReferences/polygenicDiseases/rsIDs/ReadMe.md new file mode 100644 index 0000000..6c08b11 --- /dev/null +++ b/resources/geneticReferences/polygenicDiseases/rsIDs/ReadMe.md @@ -0,0 +1,2 @@ +### This folder contains files which are gob-encoded []int64 +### These int64s are rsIDs which influence various traits. \ No newline at end of file diff --git a/utilities/createGeneticModels/createGeneticModels.go b/utilities/createGeneticModels/createGeneticModels.go index b93f3b8..a0d32da 100644 --- a/utilities/createGeneticModels/createGeneticModels.go +++ b/utilities/createGeneticModels/createGeneticModels.go @@ -48,9 +48,13 @@ import "time" func main(){ - polygenicDiseases.InitializePolygenicDiseaseVariables() + err := polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil){ + panic(err) + return + } - err := traits.InitializeTraitVariables() + err = traits.InitializeTraitVariables() if (err != nil){ panic(err) return @@ -733,7 +737,7 @@ func setStartAndMonitorCreateTrainingDataPage(window fyne.Window, previousPage f if (err != nil) { return false, false, err } //TODO: Add more attributes - attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness"} + attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness", "Obesity"} // We create the folders for each attribute's training data @@ -1003,7 +1007,7 @@ func setTrainModelsPage(window fyne.Window, previousPage func()){ description3 := getLabelCentered("This will take a while.") description4 := getLabelCentered("You must select a model to train.") - attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness"} + attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness", "Obesity"} attributeNameSelector := widget.NewSelect(attributeNamesList, nil) @@ -1269,7 +1273,8 @@ func setStartAndMonitorTrainModelPage(window fyne.Window, attributeName string, case "Height", "Autism", - "Homosexualness":{ + "Homosexualness", + "Obesity":{ return true, nil } case "Lactose Tolerance", @@ -1362,7 +1367,7 @@ func setTestModelsPage(window fyne.Window, previousPage func()){ description5 := getLabelCentered("The results will also be saved in the ModelAccuracies folder.") description6 := getLabelCentered("You must select a model to test.") - attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness"} + attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness", "Obesity"} attributeNameSelector := widget.NewSelect(attributeNamesList, nil) @@ -1432,7 +1437,8 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p case "Height", "Autism", - "Homosexualness":{ + "Homosexualness", + "Obesity":{ return true, nil } case "Lactose Tolerance", @@ -2341,6 +2347,9 @@ func getTrainingAndTestingDataFilepathLists(attributeName string)([]string, []st case "Homosexualness":{ return 14500, nil } + case "Obesity":{ + return 24009, nil + } } return 0, errors.New("Unknown attributeName: " + attributeName) diff --git a/utilities/createSampleGeneticAnalyses/createSampleGeneticAnalyses.go b/utilities/createSampleGeneticAnalyses/createSampleGeneticAnalyses.go index 722b456..64f3da0 100644 --- a/utilities/createSampleGeneticAnalyses/createSampleGeneticAnalyses.go +++ b/utilities/createSampleGeneticAnalyses/createSampleGeneticAnalyses.go @@ -31,7 +31,12 @@ func main(){ } monogenicDiseases.InitializeMonogenicDiseaseVariables() - polygenicDiseases.InitializePolygenicDiseaseVariables() + + err = polygenicDiseases.InitializePolygenicDiseaseVariables() + if (err != nil) { + log.Println("InitializePolygenicDiseaseVariables failed: " + err.Error()) + return + } err = traits.InitializeTraitVariables() if (err != nil) { diff --git a/utilities/extractGiantLoci/.gitignore b/utilities/extractGiantLoci/.gitignore index 45e3271..8d3df25 100644 --- a/utilities/extractGiantLoci/.gitignore +++ b/utilities/extractGiantLoci/.gitignore @@ -1,3 +1,5 @@ GiantHeightStudy.txt GiantHeightStudyLoci.gob +GiantObesityStudy.txt +GiantObesityStudyLoci.gob NewLocusMetadata.gob \ No newline at end of file diff --git a/utilities/extractGiantLoci/extractGiantLoci.go b/utilities/extractGiantLoci/extractGiantLoci.go index 4884908..9d99b82 100644 --- a/utilities/extractGiantLoci/extractGiantLoci.go +++ b/utilities/extractGiantLoci/extractGiantLoci.go @@ -5,18 +5,28 @@ // The files are a tab-delimeted file of rsIDs and their effect on a particular trait // The output file is a .gob encoded []int64 of the top 1000 most impactful loci on the trait. -// These files are then saved into /resources/geneticReferences/traits/rsIDs +// These files are then saved into the following folders: +// -Height -> /resources/geneticReferences/traits/rsIDs +// -Obesity -> /resources/geneticReferences/polygenicDiseases/rsIDs // The loci metadata for loci from these files is also imported into the locusMetadata package to enable them to be used in Seekia package main -// Here is the file I used to extract causal rsIDs for height -// Download link: -// https://portals.broadinstitute.org/collaboration/giant/images/4/4e/GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_ALL.gz +// Here are the files I used to extract causal rsIDs -//SHA-256 Checksum: +// Trait: Height +// Download Link: +// https://portals.broadinstitute.org/collaboration/giant/images/4/4e/GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_ALL.gz +// SHA-256 Checksum: // db18859724675f2f9ba86eff28cb4dacac0629c0b25c9806a6cf2eed6bb8b71e +// Trait: Obesity (Waist-to-hip-ratio) +// Download Link: +// https://portals.broadinstitute.org/collaboration/giant/images/0/09/PublicRelease.WHRadjBMI.C.All.Add.txt.gz +// SHA-256 Checksum: +// 2a863b0357037ae5c34853342052ed3c59735d6440da0fd15d1cab34b7d49daf + + import "seekia/resources/geneticReferences/locusMetadata" import "seekia/resources/geneticReferences/modifyLocusMetadata" @@ -40,9 +50,14 @@ func main(){ extractGiantLoci := func()error{ - fileBytes, err := os.ReadFile("./GiantHeightStudy.txt") - if (err != nil) { - return errors.New("Could not open GiantHeightStudy.txt file: " + err.Error()) +// heightOrObesity := "Height" + heightOrObesity := "Obesity" + + filepath := "./Giant" + heightOrObesity + "Study.txt" + + fileBytes, err := os.ReadFile(filepath) + if (err != nil){ + return errors.New("Could not open " + filepath + ": " + err.Error()) } fileReader := bytes.NewReader(fileBytes) @@ -51,11 +66,11 @@ func main(){ // We first read the header line - //These are the columns of the file: + //These are the columns of the Height file: - // COLUMN DESCRIPTION FOR FILE NAMED GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_*.gz + // Filename: GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_*.gz // - SNPID - // -represented as CHR:POS:REF:ALT) + // -represented as CHR:POS:REF:ALT // - RSID // -RS NUMBER, WHEN AVAILABLE // - CHR @@ -82,6 +97,22 @@ func main(){ // - N // -Total sample size used in the GWAS analysis + // These are the columns of the Obesity (WHR) File: + // Filename: PublicRelease.WHRadjBMI.C.All.Add.txt.gz + // + // -1. snpname - dbSNP rsID + // -2. chr - chromosome + // -3. pos - position + // -4. markername - chr:pos + // -5. ref - reference allele (hg19 + strand) + // -6. alt - alternate allele (hg19 + strand) + // -7. beta - beta + // -8. se - standard error + // -9. pvalue - P value + // -10. n - sample size + // -11. gmaf/eur_maf - alternate allele frequency in 1000 Genome Combined/European Ancestries + // -12. exac_maf/exac_nfe_maf -alternate allele frequency in ExAC Combined/Non-Finnish European Ancestries + _, err = bufioReader.ReadString('\n') if (err != nil) { return err } @@ -91,7 +122,7 @@ func main(){ Effect float64 } - rsidsInfoMap := make(map[int64]LocusInfo) + lociInfoMap := make(map[int64]LocusInfo) for { @@ -102,58 +133,132 @@ func main(){ // We have reached the end of the file break } + // File is corrupt return errors.New("Error reading file: " + err.Error()) } lineElementsSlice := strings.Split(string(rsidInfoLine), "\t") - rsidString := lineElementsSlice[1] - rsidChromosomeString := lineElementsSlice[2] - rsidPositionString := lineElementsSlice[3] - rsidEffectString := lineElementsSlice[7] + //Outputs: + // -bool: Locus information is available + // -int64: Locus rsID + // -int: Locus Chromosome + // -int: Locus Position + // -float64: Locus effect + // -error + getLocusInfo := func()(bool, int64, int, int, float64, error){ - rsidWithoutPrefix, prefixFound := strings.CutPrefix(rsidString, "rs") - if (prefixFound == false){ - // Some of the rsIDs are not formatted in the "rs123456" format - // We skip those - //log.Println("rs prefix not found in rsID: " + rsIDString) + if (heightOrObesity == "Height"){ + + rsidString := lineElementsSlice[1] + locusChromosomeString := lineElementsSlice[2] + locusPositionString := lineElementsSlice[3] + locusEffectString := lineElementsSlice[7] + + rsidWithoutPrefix, prefixFound := strings.CutPrefix(rsidString, "rs") + if (prefixFound == false){ + // Some of the rsIDs are not formatted in the "rs123456" format + // We skip those + // log.Println("rs prefix not found in rsID: " + rsidString) + return false, 0, 0, 0, 0, nil + } + + rsID, err := helpers.ConvertStringToInt64(rsidWithoutPrefix) + if (err != nil){ + return false, 0, 0, 0, 0, errors.New("RSID is invalid: " + err.Error()) + } + + locusChromosome, err := helpers.ConvertStringToInt(locusChromosomeString) + if (err != nil){ + return false, 0, 0, 0, 0, errors.New("Locus Chromosome is invalid: " + err.Error()) + } + + locusPosition, err := helpers.ConvertStringToInt(locusPositionString) + if (err != nil){ + return false, 0, 0, 0, 0, errors.New("Locus Position is invalid: " + err.Error()) + } + + locusEffectRaw, err := helpers.ConvertStringToFloat64(locusEffectString) + if (err != nil) { + if (locusEffectString == ""){ + // The database has at least 1 entry with no effect provided + return false, 0, 0, 0, 0, nil + } + return false, 0, 0, 0, 0, errors.New("RSID effect is invalid: " + err.Error()) + } + + return true, rsID, locusChromosome, locusPosition, locusEffectRaw, nil + } + + rsidString := lineElementsSlice[0] + locusChromosomeString := lineElementsSlice[1] + locusPositionString := lineElementsSlice[2] + locusEffectString := lineElementsSlice[6] + + if (rsidString == "-" || rsidString == ""){ + return false, 0, 0, 0, 0, nil + } + + rsidWithoutPrefix, prefixFound := strings.CutPrefix(rsidString, "rs") + if (prefixFound == false){ + return false, 0, 0, 0, 0, errors.New("Obesity GWAS file contains invalid rsID: " + rsidString) + } + + rsID, err := helpers.ConvertStringToInt64(rsidWithoutPrefix) + if (err != nil){ + return false, 0, 0, 0, 0, errors.New("RSID is invalid: " + err.Error()) + } + + locusChromosome, err := helpers.ConvertStringToInt(locusChromosomeString) + if (err != nil){ + + if (locusChromosomeString == "X"){ + // TODO: Add the ability to read these chromosomes + return false, 0, 0, 0, 0, nil + } + + return false, 0, 0, 0, 0, errors.New("Locus Chromosome is invalid: " + err.Error()) + } + + locusPosition, err := helpers.ConvertStringToInt(locusPositionString) + if (err != nil){ + + hasSuffix := strings.HasSuffix(locusPositionString, "+08") + if (hasSuffix == true){ + // This is an invalid entry in the file + return false, 0, 0, 0, 0, nil + } + return false, 0, 0, 0, 0, errors.New("Locus Position is invalid: " + err.Error()) + } + + locusEffectRaw, err := helpers.ConvertStringToFloat64(locusEffectString) + if (err != nil) { + return false, 0, 0, 0, 0, errors.New("RSID effect is invalid: " + err.Error()) + } + + return true, rsID, locusChromosome, locusPosition, locusEffectRaw, nil + } + + locusInfoExists, locusRSID, locusChromosome, locusPosition, locusEffectRaw, err := getLocusInfo() + if (err != nil) { return err } + if (locusInfoExists == false){ continue } - rsID, err := helpers.ConvertStringToInt64(rsidWithoutPrefix) - if (err != nil){ - return errors.New("RSID is invalid: " + err.Error()) - } - - rsidChromosome, err := helpers.ConvertStringToInt(rsidChromosomeString) - if (err != nil){ return err } - - rsidPosition, err := helpers.ConvertStringToInt(rsidPositionString) - if (err != nil){ return err } - - rsidEffectRaw, err := helpers.ConvertStringToFloat64(rsidEffectString) - if (err != nil) { - if (rsidEffectString == ""){ - // The database has at least 1 entry with no effect provided - continue - } - return err - } - // Effect can be negative, we make it positive - rsidEffect := math.Abs(rsidEffectRaw) + locusEffect := math.Abs(locusEffectRaw) - existingLocusValue, exists := rsidsInfoMap[rsID] + existingLocusValue, exists := lociInfoMap[locusRSID] if (exists == false){ newLocusInfo := LocusInfo{ - Chromosome: rsidChromosome, - Position: rsidPosition, - Effect: rsidEffect, + Chromosome: locusChromosome, + Position: locusPosition, + Effect: locusEffect, } - rsidsInfoMap[rsID] = newLocusInfo + lociInfoMap[locusRSID] = newLocusInfo } else { // We see if the effect of this allele is greater @@ -165,24 +270,24 @@ func main(){ existingPosition := existingLocusValue.Position existingEffect := existingLocusValue.Effect - if (existingChromosome != rsidChromosome){ + if (existingChromosome != locusChromosome){ return errors.New("GIANT gwas contains two rsIDs with conflicting chromosomes.") } - if (existingPosition != rsidPosition){ + if (existingPosition != locusPosition){ return errors.New("GIANT gwas contains two rsIDs with conflicting positions.") } - if (existingEffect < rsidEffect){ + if (existingEffect < locusEffect){ // We update the value with the new effect - existingLocusValue.Effect = rsidEffect - rsidsInfoMap[rsID] = existingLocusValue + existingLocusValue.Effect = locusEffect + lociInfoMap[locusRSID] = existingLocusValue } } } // We find the top 10,000 rsIDs with the greatest effect - rsidsList := helpers.GetListOfMapKeys(rsidsInfoMap) + rsidsList := helpers.GetListOfMapKeys(lociInfoMap) compareFunction := func(rsid1 int64, rsid2 int64)int{ @@ -190,14 +295,14 @@ func main(){ panic("Identical rsIDs found during sort.") } - rsid1Info, exists := rsidsInfoMap[rsid1] + rsid1Info, exists := lociInfoMap[rsid1] if (exists == false){ - panic("rsid1 is missing from rsidsInfoMap.") + panic("rsid1 is missing from lociInfoMap.") } - rsid2Info, exists := rsidsInfoMap[rsid2] + rsid2Info, exists := lociInfoMap[rsid2] if (exists == false){ - panic("rsid2 is missing from rsidsInfoMap.") + panic("rsid2 is missing from lociInfoMap.") } rsid1Effect := rsid1Info.Effect @@ -225,17 +330,16 @@ func main(){ for _, rsID := range mostImpactfulLoci{ - locusInfo, exists := rsidsInfoMap[rsID] + locusInfo, exists := lociInfoMap[rsID] if (exists == false){ - return errors.New("rsidsInfoMap missing rsID.") + return errors.New("lociInfoMap missing rsID.") } locusChromosome := locusInfo.Chromosome locusPosition := locusInfo.Position - locusReferencesMap := map[string]string{ - "Height Genome-Wide Association Study (GWAS) created by the GIANT consortium": "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files", - } + locusReferencesMap := make(map[string]string) + locusReferencesMap[heightOrObesity + " Genome-Wide Association Study (GWAS) created by the GIANT consortium"] = "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files" newLocusMetadata := locusMetadata.LocusMetadata{ RSIDsList: []int64{rsID}, @@ -251,12 +355,16 @@ func main(){ locusMetadatasToAddList = append(locusMetadatasToAddList, newLocusMetadata) } + // We add the locus metadatas + _, newLocusMetadataFileBytes, err := modifyLocusMetadata.AddLocusMetadata(locusMetadatasToAddList) if (err != nil) { return err } err = localFilesystem.CreateOrOverwriteFile(newLocusMetadataFileBytes, "./", "NewLocusMetadata.gob") if (err != nil){ return err } + // We create the rsIDs list file + buffer := new(bytes.Buffer) gobEncoder := gob.NewEncoder(buffer) @@ -266,7 +374,9 @@ func main(){ encodedBytes := buffer.Bytes() - err = localFilesystem.CreateOrOverwriteFile(encodedBytes, "./", "GiantHeightStudyLoci.gob") + filename := "Giant" + heightOrObesity + "StudyLoci.gob" + + err = localFilesystem.CreateOrOverwriteFile(encodedBytes, "./", filename) if (err != nil){ return err } return nil