Added the Obesity disease to genetic analyses.

This commit is contained in:
Simon Sarasova 2024-08-14 03:37:18 +00:00
parent 8bc2bc01f3
commit 02676dbda1
No known key found for this signature in database
GPG key ID: EEDA4103C9C36944
29 changed files with 408 additions and 103 deletions

View file

@ -6,6 +6,7 @@ Small and insignificant changes may not be included in this log.
## Unversioned Changes
* Added the Obesity disease to genetic analyses. - *Simon Sarasova*
* Implemented neural network prediction for polygenic diseases to replace old method. Added autism and homosexualness to genetic analyses. - *Simon Sarasova*
* Increased the quantity of attributes that are extracted from the OpenSNP biobank data archive. - *Simon Sarasova*
* Added numeric traits to genetic analyses. - *Simon Sarasova*

View file

@ -9,4 +9,4 @@ Many other people have written code for modules which are imported by Seekia. Th
Name | Date Of First Commit | Number Of Commits
--- | --- | ---
Simon Sarasova | June 13, 2023 | 278
Simon Sarasova | June 13, 2023 | 279

View file

@ -395,7 +395,8 @@ func initializeApplicationVariables()error{
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) { return err }
err = traits.InitializeTraitVariables()
if (err != nil) { return err }

View file

@ -37,9 +37,13 @@ func TestGenerateParameters(t *testing.T){
func TestGenerateProfiles(t *testing.T){
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err := traits.InitializeTraitVariables()
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}

View file

@ -25,7 +25,11 @@ func TestCreateCoupleGeneticAnalysis_SingleGenomes(t *testing.T){
}
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
err = traits.InitializeTraitVariables()
if (err != nil) {
@ -104,7 +108,11 @@ func TestCreateCoupleGeneticAnalysis_SingleAndMultipleGenomes(t *testing.T){
}
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
err = traits.InitializeTraitVariables()
if (err != nil) {
@ -206,7 +214,11 @@ func TestCreateCoupleGeneticAnalysis_MultipleGenomes(t *testing.T){
}
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
err = traits.InitializeTraitVariables()
if (err != nil) {

View file

@ -25,7 +25,11 @@ func TestCreatePersonGeneticAnalysis_SingleGenome(t *testing.T){
}
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
err = traits.InitializeTraitVariables()
if (err != nil) {
@ -88,7 +92,11 @@ func TestCreatePersonGeneticAnalysis_MultipleGenomes(t *testing.T){
}
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
err = traits.InitializeTraitVariables()
if (err != nil) {

View file

@ -804,7 +804,8 @@ func GetNumericOutcomeValueFromOutputLayer(attributeName string, outputLayer []f
return 54, 272, nil
}
case "Autism",
"Homosexualness":{
"Homosexualness",
"Obesity":{
return 0, 10, nil
}
}
@ -860,6 +861,11 @@ func getNeuralNetworkLayerSizes(attributeName string)(int, int, int, int, error)
// There is 1 output neuron, representing a homosexualness value
return 12, 10, 5, 1, nil
}
case "Obesity":{
// There are 3000 input neurons
// There is 1 output neuron, representing an obesity value
return 3000, 3, 2, 1, nil
}
}
return 0, 0, 0, 0, errors.New("getNeuralNetworkLayerSizes called with unknown attributeName: " + attributeName)
@ -931,7 +937,8 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
return traitLociList, nil
}
case "Autism":{
case "Autism",
"Obesity":{
diseaseObject, err := polygenicDiseases.GetPolygenicDiseaseObject(attributeName)
if (err != nil) { return nil, err }
@ -1106,6 +1113,21 @@ func CreateGeneticPredictionTrainingData_OpenSNP(
outputLayer := []float32{outputValueFloat32}
return true, outputLayer, nil
}
case "Obesity":{
userObesityIsKnown := userPhenotypeDataObject.ObesityIsKnown
if (userObesityIsKnown == false){
return false, nil, nil
}
userObesity := userPhenotypeDataObject.Obesity
outputValueFloat32 := float32(userObesity)
outputLayer := []float32{outputValueFloat32}
return true, outputLayer, nil
}
}

View file

@ -15,9 +15,13 @@ import "testing"
func TestPersonSampleAnalyses(t *testing.T){
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err := traits.InitializeTraitVariables()
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
@ -47,9 +51,13 @@ func TestPersonSampleAnalyses(t *testing.T){
func TestCoupleSampleAnalyses(t *testing.T){
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err := traits.InitializeTraitVariables()
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}

View file

@ -1382,14 +1382,18 @@ func TestCreateAndReadRequest_BroadcastContent(t *testing.T){
// We initialize these variables so we can create fake profiles
err := traits.InitializeTraitVariables()
monogenicDiseases.InitializeMonogenicDiseaseVariables()
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = profileFormat.InitializeProfileFormatVariables()
if (err != nil) {
t.Fatalf("Failed to initialize profile format variables: " + err.Error())

View file

@ -326,14 +326,18 @@ func TestCreateAndReadResponse_GetProfilesInfo(t *testing.T){
func TestCreateAndReadResponse_GetProfiles(t *testing.T){
err := traits.InitializeTraitVariables()
monogenicDiseases.InitializeMonogenicDiseaseVariables()
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = profileFormat.InitializeProfileFormatVariables()
if (err != nil) {
t.Fatalf("Failed to initialize profile format variables: " + err.Error())

View file

@ -19,7 +19,10 @@ func TestGetAttributeDisplayInfo(t *testing.T){
t.Fatalf("InitializeGlobalSettingsDatastore failed: " + err.Error())
}
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
err = traits.InitializeTraitVariables()
if (err != nil) {

View file

@ -10,11 +10,15 @@ import "seekia/internal/helpers"
import "testing"
import "strings"
func TestProfileFormat(t *testing.T){
polygenicDiseases.InitializePolygenicDiseaseVariables()
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
err := traits.InitializeTraitVariables()
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
@ -199,7 +203,10 @@ func TestProfileGeneticReferences(t *testing.T){
}
}
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList()
if (err != nil) {

View file

@ -27,6 +27,9 @@ var predictionModel_Autism []byte
//go:embed predictionModels/HomosexualnessModel.gob
var predictionModel_Homosexualness []byte
//go:embed predictionModels/ObesityModel.gob
var predictionModel_Obesity []byte
//Outputs:
// -bool: Model exists
@ -50,6 +53,9 @@ func GetGeneticPredictionModelBytes(traitName string)(bool, []byte){
case "Homosexualness":{
return true, predictionModel_Homosexualness
}
case "Obesity":{
return true, predictionModel_Obesity
}
}
return false, nil
@ -87,6 +93,9 @@ var predictionAccuracy_Autism []byte
//go:embed predictionModelAccuracies/HomosexualnessModelAccuracy.gob
var predictionAccuracy_Homosexualness []byte
//go:embed predictionModelAccuracies/ObesityModelAccuracy.gob
var predictionAccuracy_Obesity []byte
// The files returned by this function are .gob encoded geneticPrediction.NumericAttributePredictionAccuracyInfoMap objects
func GetPredictionModelNumericAttributeAccuracyInfoBytes(attributeName string)([]byte, error){
@ -101,6 +110,9 @@ func GetPredictionModelNumericAttributeAccuracyInfoBytes(attributeName string)([
case "Homosexualness":{
return predictionAccuracy_Homosexualness, nil
}
case "Obesity":{
return predictionAccuracy_Obesity, nil
}
}
return nil, errors.New("GetPredictionModelNumericAttributeAccuracyInfoBytes called with unknown attributeName: " + attributeName)

View file

@ -9,7 +9,7 @@ import "seekia/internal/genetics/geneticPrediction"
func TestGeneticPredictionModels(t *testing.T){
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism"}
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Obesity"}
for _, attributeName := range attributeNamesList{
@ -43,7 +43,7 @@ func TestGeneticPredictionModelAccuracies(t *testing.T){
}
}
numericAttributeNamesList := []string{"Height", "Autism", "Homosexualness"}
numericAttributeNamesList := []string{"Height", "Autism", "Homosexualness", "Obesity"}
for _, attributeName := range numericAttributeNamesList{

View file

@ -185,7 +185,10 @@ func TestGeneticReferences(t *testing.T){
}
}
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil){
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
}
polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList()
if (err != nil) {

View file

@ -0,0 +1,83 @@
package polygenicDiseases
import "seekia/internal/helpers"
import _ "embed"
import "errors"
import "encoding/gob"
import "bytes"
import "maps"
//go:embed rsIDs/GiantObesityStudyLoci.gob
var GiantObesityStudyLociFile []byte
func getObesityDiseaseObject()(PolygenicDisease, error){
// Map Structure: rsID -> References Map
locusReferencesMap := make(map[int64]map[string]string)
referencesMap_List1 := make(map[string]string)
referencesMap_List1["GIANT consortium - Meta-analyses of Genome-Wide Association Studies - 2022 - Obesity"] = "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files"
// These SNPs are taken from the meta-analyses of Genome-Wide Association Studies (GWAS) created by the GIANT consortium
//https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files
// Download link:
// https://portals.broadinstitute.org/collaboration/giant/images/0/09/PublicRelease.WHRadjBMI.C.All.Add.txt.gz
// SHA-256 Checksum:
// 2a863b0357037ae5c34853342052ed3c59735d6440da0fd15d1cab34b7d49daf
// See /utilities/extractGiantLoci/extractGiantLoci.go to see how they were extracted from the file
buffer := bytes.NewBuffer(GiantObesityStudyLociFile)
decoder := gob.NewDecoder(buffer)
var lociList_1 []int64
err := decoder.Decode(&lociList_1)
if (err != nil){ return PolygenicDisease{}, err }
for _, rsID := range lociList_1{
locusReferencesMap[rsID] = maps.Clone(referencesMap_List1)
}
obesityLociList := helpers.GetListOfMapKeys(locusReferencesMap)
referencesMap := make(map[string]string)
referencesMap["Obesity Genome-Wide Association Study (GWAS) created by the GIANT consortium"] = "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files"
getAverageRiskProbabilitiesFunction := func(maleOrFemale string, inputAge int)(float64, error){
// Roughly 30% of people are obese.
if (maleOrFemale == "Male"){
return 0.30, nil
}
if (maleOrFemale != "Female"){
return 0, errors.New("Trying to get breast cancer risk probability for invalid maleOrFemale: " + maleOrFemale)
}
//TODO: Add different probabilities per age
return 0.30, nil
}
obesityObject := PolygenicDisease{
DiseaseName: "Obesity",
EffectedSex: "Both",
DiseaseDescription: "The condition of having an excessive amount of body fat.",
LocusReferencesMap: locusReferencesMap,
LociList: obesityLociList,
GetAverageRiskProbabilitiesFunction: getAverageRiskProbabilitiesFunction,
References: referencesMap,
}
return obesityObject, nil
}

View file

@ -47,12 +47,15 @@ var polygenicDiseaseNamesList []string
var polygenicDiseaseObjectsList []PolygenicDisease
// This must be called once during application startup
func InitializePolygenicDiseaseVariables(){
func InitializePolygenicDiseaseVariables()error{
breastCancerObject := getBreastCancerDiseaseObject()
autismObject := getAutismDiseaseObject()
polygenicDiseaseObjectsList = []PolygenicDisease{breastCancerObject, autismObject}
obesityObject, err := getObesityDiseaseObject()
if (err != nil) { return err }
polygenicDiseaseObjectsList = []PolygenicDisease{breastCancerObject, autismObject, obesityObject}
polygenicDiseaseNamesList = make([]string, 0, len(polygenicDiseaseObjectsList))
@ -62,6 +65,8 @@ func InitializePolygenicDiseaseVariables(){
polygenicDiseaseNamesList = append(polygenicDiseaseNamesList, diseaseName)
}
return nil
}
// Be aware that all of these functions are returning original objects/slices, not copies

View file

@ -0,0 +1,2 @@
### This folder contains files which are gob-encoded []int64
### These int64s are rsIDs which influence various traits.

View file

@ -48,9 +48,13 @@ import "time"
func main(){
polygenicDiseases.InitializePolygenicDiseaseVariables()
err := polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil){
panic(err)
return
}
err := traits.InitializeTraitVariables()
err = traits.InitializeTraitVariables()
if (err != nil){
panic(err)
return
@ -733,7 +737,7 @@ func setStartAndMonitorCreateTrainingDataPage(window fyne.Window, previousPage f
if (err != nil) { return false, false, err }
//TODO: Add more attributes
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness"}
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness", "Obesity"}
// We create the folders for each attribute's training data
@ -1003,7 +1007,7 @@ func setTrainModelsPage(window fyne.Window, previousPage func()){
description3 := getLabelCentered("This will take a while.")
description4 := getLabelCentered("You must select a model to train.")
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness"}
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness", "Obesity"}
attributeNameSelector := widget.NewSelect(attributeNamesList, nil)
@ -1269,7 +1273,8 @@ func setStartAndMonitorTrainModelPage(window fyne.Window, attributeName string,
case "Height",
"Autism",
"Homosexualness":{
"Homosexualness",
"Obesity":{
return true, nil
}
case "Lactose Tolerance",
@ -1362,7 +1367,7 @@ func setTestModelsPage(window fyne.Window, previousPage func()){
description5 := getLabelCentered("The results will also be saved in the ModelAccuracies folder.")
description6 := getLabelCentered("You must select a model to test.")
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness"}
attributeNamesList := []string{"Eye Color", "Lactose Tolerance", "Height", "Autism", "Homosexualness", "Obesity"}
attributeNameSelector := widget.NewSelect(attributeNamesList, nil)
@ -1432,7 +1437,8 @@ func setStartAndMonitorTestModelPage(window fyne.Window, attributeName string, p
case "Height",
"Autism",
"Homosexualness":{
"Homosexualness",
"Obesity":{
return true, nil
}
case "Lactose Tolerance",
@ -2341,6 +2347,9 @@ func getTrainingAndTestingDataFilepathLists(attributeName string)([]string, []st
case "Homosexualness":{
return 14500, nil
}
case "Obesity":{
return 24009, nil
}
}
return 0, errors.New("Unknown attributeName: " + attributeName)

View file

@ -31,7 +31,12 @@ func main(){
}
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
if (err != nil) {
log.Println("InitializePolygenicDiseaseVariables failed: " + err.Error())
return
}
err = traits.InitializeTraitVariables()
if (err != nil) {

View file

@ -1,3 +1,5 @@
GiantHeightStudy.txt
GiantHeightStudyLoci.gob
GiantObesityStudy.txt
GiantObesityStudyLoci.gob
NewLocusMetadata.gob

View file

@ -5,18 +5,28 @@
// The files are a tab-delimeted file of rsIDs and their effect on a particular trait
// The output file is a .gob encoded []int64 of the top 1000 most impactful loci on the trait.
// These files are then saved into /resources/geneticReferences/traits/rsIDs
// These files are then saved into the following folders:
// -Height -> /resources/geneticReferences/traits/rsIDs
// -Obesity -> /resources/geneticReferences/polygenicDiseases/rsIDs
// The loci metadata for loci from these files is also imported into the locusMetadata package to enable them to be used in Seekia
package main
// Here is the file I used to extract causal rsIDs for height
// Download link:
// https://portals.broadinstitute.org/collaboration/giant/images/4/4e/GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_ALL.gz
// Here are the files I used to extract causal rsIDs
//SHA-256 Checksum:
// Trait: Height
// Download Link:
// https://portals.broadinstitute.org/collaboration/giant/images/4/4e/GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_ALL.gz
// SHA-256 Checksum:
// db18859724675f2f9ba86eff28cb4dacac0629c0b25c9806a6cf2eed6bb8b71e
// Trait: Obesity (Waist-to-hip-ratio)
// Download Link:
// https://portals.broadinstitute.org/collaboration/giant/images/0/09/PublicRelease.WHRadjBMI.C.All.Add.txt.gz
// SHA-256 Checksum:
// 2a863b0357037ae5c34853342052ed3c59735d6440da0fd15d1cab34b7d49daf
import "seekia/resources/geneticReferences/locusMetadata"
import "seekia/resources/geneticReferences/modifyLocusMetadata"
@ -40,9 +50,14 @@ func main(){
extractGiantLoci := func()error{
fileBytes, err := os.ReadFile("./GiantHeightStudy.txt")
if (err != nil) {
return errors.New("Could not open GiantHeightStudy.txt file: " + err.Error())
// heightOrObesity := "Height"
heightOrObesity := "Obesity"
filepath := "./Giant" + heightOrObesity + "Study.txt"
fileBytes, err := os.ReadFile(filepath)
if (err != nil){
return errors.New("Could not open " + filepath + ": " + err.Error())
}
fileReader := bytes.NewReader(fileBytes)
@ -51,11 +66,11 @@ func main(){
// We first read the header line
//These are the columns of the file:
//These are the columns of the Height file:
// COLUMN DESCRIPTION FOR FILE NAMED GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_*.gz
// Filename: GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_*.gz
// - SNPID
// -represented as CHR:POS:REF:ALT)
// -represented as CHR:POS:REF:ALT
// - RSID
// -RS NUMBER, WHEN AVAILABLE
// - CHR
@ -82,6 +97,22 @@ func main(){
// - N
// -Total sample size used in the GWAS analysis
// These are the columns of the Obesity (WHR) File:
// Filename: PublicRelease.WHRadjBMI.C.All.Add.txt.gz
//
// -1. snpname - dbSNP rsID
// -2. chr - chromosome
// -3. pos - position
// -4. markername - chr:pos
// -5. ref - reference allele (hg19 + strand)
// -6. alt - alternate allele (hg19 + strand)
// -7. beta - beta
// -8. se - standard error
// -9. pvalue - P value
// -10. n - sample size
// -11. gmaf/eur_maf - alternate allele frequency in 1000 Genome Combined/European Ancestries
// -12. exac_maf/exac_nfe_maf -alternate allele frequency in ExAC Combined/Non-Finnish European Ancestries
_, err = bufioReader.ReadString('\n')
if (err != nil) { return err }
@ -91,7 +122,7 @@ func main(){
Effect float64
}
rsidsInfoMap := make(map[int64]LocusInfo)
lociInfoMap := make(map[int64]LocusInfo)
for {
@ -102,58 +133,132 @@ func main(){
// We have reached the end of the file
break
}
// File is corrupt
return errors.New("Error reading file: " + err.Error())
}
lineElementsSlice := strings.Split(string(rsidInfoLine), "\t")
rsidString := lineElementsSlice[1]
rsidChromosomeString := lineElementsSlice[2]
rsidPositionString := lineElementsSlice[3]
rsidEffectString := lineElementsSlice[7]
//Outputs:
// -bool: Locus information is available
// -int64: Locus rsID
// -int: Locus Chromosome
// -int: Locus Position
// -float64: Locus effect
// -error
getLocusInfo := func()(bool, int64, int, int, float64, error){
rsidWithoutPrefix, prefixFound := strings.CutPrefix(rsidString, "rs")
if (prefixFound == false){
// Some of the rsIDs are not formatted in the "rs123456" format
// We skip those
//log.Println("rs prefix not found in rsID: " + rsIDString)
if (heightOrObesity == "Height"){
rsidString := lineElementsSlice[1]
locusChromosomeString := lineElementsSlice[2]
locusPositionString := lineElementsSlice[3]
locusEffectString := lineElementsSlice[7]
rsidWithoutPrefix, prefixFound := strings.CutPrefix(rsidString, "rs")
if (prefixFound == false){
// Some of the rsIDs are not formatted in the "rs123456" format
// We skip those
// log.Println("rs prefix not found in rsID: " + rsidString)
return false, 0, 0, 0, 0, nil
}
rsID, err := helpers.ConvertStringToInt64(rsidWithoutPrefix)
if (err != nil){
return false, 0, 0, 0, 0, errors.New("RSID is invalid: " + err.Error())
}
locusChromosome, err := helpers.ConvertStringToInt(locusChromosomeString)
if (err != nil){
return false, 0, 0, 0, 0, errors.New("Locus Chromosome is invalid: " + err.Error())
}
locusPosition, err := helpers.ConvertStringToInt(locusPositionString)
if (err != nil){
return false, 0, 0, 0, 0, errors.New("Locus Position is invalid: " + err.Error())
}
locusEffectRaw, err := helpers.ConvertStringToFloat64(locusEffectString)
if (err != nil) {
if (locusEffectString == ""){
// The database has at least 1 entry with no effect provided
return false, 0, 0, 0, 0, nil
}
return false, 0, 0, 0, 0, errors.New("RSID effect is invalid: " + err.Error())
}
return true, rsID, locusChromosome, locusPosition, locusEffectRaw, nil
}
rsidString := lineElementsSlice[0]
locusChromosomeString := lineElementsSlice[1]
locusPositionString := lineElementsSlice[2]
locusEffectString := lineElementsSlice[6]
if (rsidString == "-" || rsidString == ""){
return false, 0, 0, 0, 0, nil
}
rsidWithoutPrefix, prefixFound := strings.CutPrefix(rsidString, "rs")
if (prefixFound == false){
return false, 0, 0, 0, 0, errors.New("Obesity GWAS file contains invalid rsID: " + rsidString)
}
rsID, err := helpers.ConvertStringToInt64(rsidWithoutPrefix)
if (err != nil){
return false, 0, 0, 0, 0, errors.New("RSID is invalid: " + err.Error())
}
locusChromosome, err := helpers.ConvertStringToInt(locusChromosomeString)
if (err != nil){
if (locusChromosomeString == "X"){
// TODO: Add the ability to read these chromosomes
return false, 0, 0, 0, 0, nil
}
return false, 0, 0, 0, 0, errors.New("Locus Chromosome is invalid: " + err.Error())
}
locusPosition, err := helpers.ConvertStringToInt(locusPositionString)
if (err != nil){
hasSuffix := strings.HasSuffix(locusPositionString, "+08")
if (hasSuffix == true){
// This is an invalid entry in the file
return false, 0, 0, 0, 0, nil
}
return false, 0, 0, 0, 0, errors.New("Locus Position is invalid: " + err.Error())
}
locusEffectRaw, err := helpers.ConvertStringToFloat64(locusEffectString)
if (err != nil) {
return false, 0, 0, 0, 0, errors.New("RSID effect is invalid: " + err.Error())
}
return true, rsID, locusChromosome, locusPosition, locusEffectRaw, nil
}
locusInfoExists, locusRSID, locusChromosome, locusPosition, locusEffectRaw, err := getLocusInfo()
if (err != nil) { return err }
if (locusInfoExists == false){
continue
}
rsID, err := helpers.ConvertStringToInt64(rsidWithoutPrefix)
if (err != nil){
return errors.New("RSID is invalid: " + err.Error())
}
rsidChromosome, err := helpers.ConvertStringToInt(rsidChromosomeString)
if (err != nil){ return err }
rsidPosition, err := helpers.ConvertStringToInt(rsidPositionString)
if (err != nil){ return err }
rsidEffectRaw, err := helpers.ConvertStringToFloat64(rsidEffectString)
if (err != nil) {
if (rsidEffectString == ""){
// The database has at least 1 entry with no effect provided
continue
}
return err
}
// Effect can be negative, we make it positive
rsidEffect := math.Abs(rsidEffectRaw)
locusEffect := math.Abs(locusEffectRaw)
existingLocusValue, exists := rsidsInfoMap[rsID]
existingLocusValue, exists := lociInfoMap[locusRSID]
if (exists == false){
newLocusInfo := LocusInfo{
Chromosome: rsidChromosome,
Position: rsidPosition,
Effect: rsidEffect,
Chromosome: locusChromosome,
Position: locusPosition,
Effect: locusEffect,
}
rsidsInfoMap[rsID] = newLocusInfo
lociInfoMap[locusRSID] = newLocusInfo
} else {
// We see if the effect of this allele is greater
@ -165,24 +270,24 @@ func main(){
existingPosition := existingLocusValue.Position
existingEffect := existingLocusValue.Effect
if (existingChromosome != rsidChromosome){
if (existingChromosome != locusChromosome){
return errors.New("GIANT gwas contains two rsIDs with conflicting chromosomes.")
}
if (existingPosition != rsidPosition){
if (existingPosition != locusPosition){
return errors.New("GIANT gwas contains two rsIDs with conflicting positions.")
}
if (existingEffect < rsidEffect){
if (existingEffect < locusEffect){
// We update the value with the new effect
existingLocusValue.Effect = rsidEffect
rsidsInfoMap[rsID] = existingLocusValue
existingLocusValue.Effect = locusEffect
lociInfoMap[locusRSID] = existingLocusValue
}
}
}
// We find the top 10,000 rsIDs with the greatest effect
rsidsList := helpers.GetListOfMapKeys(rsidsInfoMap)
rsidsList := helpers.GetListOfMapKeys(lociInfoMap)
compareFunction := func(rsid1 int64, rsid2 int64)int{
@ -190,14 +295,14 @@ func main(){
panic("Identical rsIDs found during sort.")
}
rsid1Info, exists := rsidsInfoMap[rsid1]
rsid1Info, exists := lociInfoMap[rsid1]
if (exists == false){
panic("rsid1 is missing from rsidsInfoMap.")
panic("rsid1 is missing from lociInfoMap.")
}
rsid2Info, exists := rsidsInfoMap[rsid2]
rsid2Info, exists := lociInfoMap[rsid2]
if (exists == false){
panic("rsid2 is missing from rsidsInfoMap.")
panic("rsid2 is missing from lociInfoMap.")
}
rsid1Effect := rsid1Info.Effect
@ -225,17 +330,16 @@ func main(){
for _, rsID := range mostImpactfulLoci{
locusInfo, exists := rsidsInfoMap[rsID]
locusInfo, exists := lociInfoMap[rsID]
if (exists == false){
return errors.New("rsidsInfoMap missing rsID.")
return errors.New("lociInfoMap missing rsID.")
}
locusChromosome := locusInfo.Chromosome
locusPosition := locusInfo.Position
locusReferencesMap := map[string]string{
"Height Genome-Wide Association Study (GWAS) created by the GIANT consortium": "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files",
}
locusReferencesMap := make(map[string]string)
locusReferencesMap[heightOrObesity + " Genome-Wide Association Study (GWAS) created by the GIANT consortium"] = "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files"
newLocusMetadata := locusMetadata.LocusMetadata{
RSIDsList: []int64{rsID},
@ -251,12 +355,16 @@ func main(){
locusMetadatasToAddList = append(locusMetadatasToAddList, newLocusMetadata)
}
// We add the locus metadatas
_, newLocusMetadataFileBytes, err := modifyLocusMetadata.AddLocusMetadata(locusMetadatasToAddList)
if (err != nil) { return err }
err = localFilesystem.CreateOrOverwriteFile(newLocusMetadataFileBytes, "./", "NewLocusMetadata.gob")
if (err != nil){ return err }
// We create the rsIDs list file
buffer := new(bytes.Buffer)
gobEncoder := gob.NewEncoder(buffer)
@ -266,7 +374,9 @@ func main(){
encodedBytes := buffer.Bytes()
err = localFilesystem.CreateOrOverwriteFile(encodedBytes, "./", "GiantHeightStudyLoci.gob")
filename := "Giant" + heightOrObesity + "StudyLoci.gob"
err = localFilesystem.CreateOrOverwriteFile(encodedBytes, "./", filename)
if (err != nil){ return err }
return nil