Added the Height trait to the traits package. Migrated locus metadata from json encoding to gob encoding.

This commit is contained in:
Simon Sarasova 2024-08-05 07:11:10 +00:00
parent 62887d48b5
commit 03b8503b89
No known key found for this signature in database
GPG key ID: EEDA4103C9C36944
63 changed files with 1597 additions and 4844 deletions

View file

@ -6,6 +6,7 @@ Small and insignificant changes may not be included in this log.
## Unversioned Changes
* Added the Height trait the traits package. Migrated locus metadata from json encoding to gob encoding. - *Simon Sarasova*
* Upgraded Fyne to version 2.5.0. - *Simon Sarasova*
* Added neural network trait prediction to genetic analyses. - *Simon Sarasova*
* Improved the Create Genetic Models utility and neural network training code. Models are now able to predict traits with some accuracy. - *Simon Sarasova*

View file

@ -9,4 +9,4 @@ Many other people have written code for modules which are imported by Seekia. Th
Name | Date Of First Commit | Number Of Commits
--- | --- | ---
Simon Sarasova | June 13, 2023 | 267
Simon Sarasova | June 13, 2023 | 268

View file

@ -285,8 +285,6 @@ func setViewPolygenicDiseaseLocusDetailsPage(window fyne.Window, diseaseName str
return
}
locusGeneName := locusMetadataObject.GeneNamesList[0]
diseaseNameLabel := widget.NewLabel("Disease Name:")
diseaseNameText := getBoldLabel(diseaseName)
diseaseNameRow := container.NewHBox(layout.NewSpacer(), diseaseNameLabel, diseaseNameText, layout.NewSpacer())
@ -317,8 +315,27 @@ func setViewPolygenicDiseaseLocusDetailsPage(window fyne.Window, diseaseName str
locusNamesText := getBoldLabel(locusNamesListString)
locusNamesRow := container.NewHBox(layout.NewSpacer(), locusNamesLabel, locusNamesText, layout.NewSpacer())
getLocusGeneNameLabelValue := func()string{
locusGeneInfoIsKnown := locusMetadataObject.GeneInfoIsKnown
if (locusGeneInfoIsKnown == false){
return "Unknown"
}
locusGeneExists := locusMetadataObject.GeneExists
if (locusGeneExists == false){
return "None"
}
locusGeneName := locusMetadataObject.GeneNamesList[0]
return locusGeneName
}
locusGeneNameLabelValue := getLocusGeneNameLabelValue()
geneNameLabel := widget.NewLabel("Gene Name:")
geneNameText := getBoldLabel(locusGeneName)
geneNameText := getBoldLabel(locusGeneNameLabelValue)
geneNameRow := container.NewHBox(layout.NewSpacer(), geneNameLabel, geneNameText, layout.NewSpacer())
viewReferencesButton := getWidgetCentered(widget.NewButtonWithIcon("View References", theme.ListIcon(), func(){

View file

@ -2992,25 +2992,35 @@ func setViewMateProfilePage_PolygenicDiseases(window fyne.Window, userOrOffsprin
locusValueAttributeName := "LocusValue_rs" + locusRSIDString
userLocusBasePairExists, _, userLocusBasePair, err := getAnyUserProfileAttributeFunction(locusValueAttributeName)
userLocusValueExists, _, userLocusValue, err := getAnyUserProfileAttributeFunction(locusValueAttributeName)
if (err != nil) { return nil, err }
if (userLocusBasePairExists == false){
if (userLocusValueExists == false){
continue
}
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusBasePair, ";")
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusValue, ";")
if (semicolonFound == false){
return nil, errors.New("Database corrupt: Contains profile with invalid " + locusValueAttributeName + " value: " + userLocusBasePair)
return nil, errors.New("Database corrupt: Contains profile with invalid " + locusValueAttributeName + " value: " + userLocusValue)
}
userLocusValue := locusValue.LocusValue{
userLocusIsPhasedAttributeName := "LocusIsPhased_rs" + locusRSIDString
userLocusIsPhasedExists, _, userLocusIsPhasedString, err := getAnyUserProfileAttributeFunction(userLocusIsPhasedAttributeName)
if (err != nil) { return nil, err }
if (userLocusIsPhasedExists == false){
return nil, errors.New("Database corrupt: Contains profile with locusValue but not locusIsPhased status for locus: " + locusRSIDString)
}
userLocusIsPhased, err := helpers.ConvertYesOrNoStringToBool(userLocusIsPhasedString)
if (err != nil) { return nil, err }
userLocusValueObject := locusValue.LocusValue{
Base1Value: userLocusBase1,
Base2Value: userLocusBase2,
//TODO: Share LocusIsPhased information in user profiles and retrieve it into this value
LocusIsPhased: false,
LocusIsPhased: userLocusIsPhased,
}
userDiseaseLocusValuesMap[locusRSID] = userLocusValue
userDiseaseLocusValuesMap[locusRSID] = userLocusValueObject
}
userDiseaseInfoIsKnown, userDiseaseRiskScore, userNumberOfLociTested, _, err := createPersonGeneticAnalysis.GetPersonGenomePolygenicDiseaseInfo(diseaseLociList, userDiseaseLocusValuesMap, true)
@ -3215,25 +3225,35 @@ func setViewMateProfilePage_PolygenicDiseaseLoci(window fyne.Window, diseaseName
locusValueAttributeName := "LocusValue_rs" + locusRSIDString
userLocusBasePairExists, _, userLocusBasePair, err := getAnyUserProfileAttributeFunction(locusValueAttributeName)
userLocusValueExists, _, userLocusValue, err := getAnyUserProfileAttributeFunction(locusValueAttributeName)
if (err != nil) { return nil, err }
if (userLocusBasePairExists == false){
if (userLocusValueExists == false){
continue
}
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusBasePair, ";")
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusValue, ";")
if (semicolonFound == false){
return nil, errors.New("Database corrupt: Contains profile with invalid " + locusValueAttributeName + " value: " + userLocusBasePair)
return nil, errors.New("Database corrupt: Contains profile with invalid " + locusValueAttributeName + " value: " + userLocusValue)
}
userLocusValue := locusValue.LocusValue{
userLocusIsPhasedAttributeName := "LocusIsPhased_rs" + locusRSIDString
userLocusIsPhasedExists, _, userLocusIsPhasedString, err := getAnyUserProfileAttributeFunction(userLocusIsPhasedAttributeName)
if (err != nil) { return nil, err }
if (userLocusIsPhasedExists == false){
return nil, errors.New("Database corrupt: Contains profile with locusValue but not locusIsPhased status for locus: " + locusRSIDString)
}
userLocusIsPhased, err := helpers.ConvertYesOrNoStringToBool(userLocusIsPhasedString)
if (err != nil) { return nil, err }
userLocusValueObject := locusValue.LocusValue{
Base1Value: userLocusBase1,
Base2Value: userLocusBase2,
//TODO: Share LocusIsPhased information in user profiles and retrieve it into this value
LocusIsPhased: false,
LocusIsPhased: userLocusIsPhased,
}
userDiseaseLocusValuesMap[locusRSID] = userLocusValue
userDiseaseLocusValuesMap[locusRSID] = userLocusValueObject
}
return userDiseaseLocusValuesMap, nil
@ -3687,25 +3707,35 @@ func setViewMateProfilePage_DiscreteGeneticTraits(window fyne.Window, userOrOffs
userLocusValueAttributeName := "LocusValue_rs" + rsIDString
userLocusBasePairIsKnown, _, userLocusBasePair, err := getAnyUserProfileAttributeFunction(userLocusValueAttributeName)
userLocusValueIsKnown, _, userLocusValue, err := getAnyUserProfileAttributeFunction(userLocusValueAttributeName)
if (err != nil) { return nil, err }
if (userLocusBasePairIsKnown == false){
if (userLocusValueIsKnown == false){
continue
}
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusBasePair, ";")
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusValue, ";")
if (semicolonFound == false){
return nil, errors.New("Database corrupt: Contains profile with invalid " + userLocusValueAttributeName + " value: " + userLocusBasePair)
return nil, errors.New("Database corrupt: Contains profile with invalid " + userLocusValueAttributeName + " value: " + userLocusValue)
}
userLocusValue := locusValue.LocusValue{
userLocusIsPhasedAttributeName := "LocusIsPhased_rs" + rsIDString
userLocusIsPhasedExists, _, userLocusIsPhasedString, err := getAnyUserProfileAttributeFunction(userLocusIsPhasedAttributeName)
if (err != nil) { return nil, err }
if (userLocusIsPhasedExists == false){
return nil, errors.New("Database corrupt: Contains profile with locusValue but not locusIsPhased status for locus: " + rsIDString)
}
userLocusIsPhased, err := helpers.ConvertYesOrNoStringToBool(userLocusIsPhasedString)
if (err != nil) { return nil, err }
userLocusValueObject := locusValue.LocusValue{
Base1Value: userLocusBase1,
Base2Value: userLocusBase2,
//TODO: Share LocusIsPhased information in user profiles and retrieve it into this value
LocusIsPhased: false,
LocusIsPhased: userLocusIsPhased,
}
userTraitLocusValuesMap[rsID] = userLocusValue
userTraitLocusValuesMap[rsID] = userLocusValueObject
}
if (userOrOffspring == "User"){
@ -4042,25 +4072,35 @@ func setViewMateProfilePage_DiscreteTraitRules(window fyne.Window, traitName str
userLocusValueAttributeName := "LocusValue_rs" + rsIDString
userLocusBasePairIsKnown, _, userLocusBasePair, err := getAnyUserProfileAttributeFunction(userLocusValueAttributeName)
userLocusValueIsKnown, _, userLocusValue, err := getAnyUserProfileAttributeFunction(userLocusValueAttributeName)
if (err != nil) { return false, nil, err }
if (userLocusBasePairIsKnown == false){
if (userLocusValueIsKnown == false){
continue
}
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusBasePair, ";")
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusValue, ";")
if (semicolonFound == false){
return false, nil, errors.New("Database corrupt: Contains profile with invalid " + userLocusValueAttributeName + " value: " + userLocusBasePair)
return false, nil, errors.New("Database corrupt: Contains profile with invalid " + userLocusValueAttributeName + " value: " + userLocusValue)
}
userLocusValue := locusValue.LocusValue{
userLocusIsPhasedAttributeName := "LocusIsPhased_rs" + rsIDString
userLocusIsPhasedExists, _, userLocusIsPhasedString, err := getAnyUserProfileAttributeFunction(userLocusIsPhasedAttributeName)
if (err != nil) { return false, nil, err }
if (userLocusIsPhasedExists == false){
return false, nil, errors.New("Database corrupt: Contains profile with locusValue but not locusIsPhased status for locus: " + rsIDString)
}
userLocusIsPhased, err := helpers.ConvertYesOrNoStringToBool(userLocusIsPhasedString)
if (err != nil) { return false, nil, err }
userLocusValueObject := locusValue.LocusValue{
Base1Value: userLocusBase1,
Base2Value: userLocusBase2,
//TODO: Share LocusIsPhased information in user profiles and retrieve it into this value
LocusIsPhased: false,
LocusIsPhased: userLocusIsPhased,
}
userTraitLocusValuesMap[rsID] = userLocusValue
userTraitLocusValuesMap[rsID] = userLocusValueObject
}
if (len(userTraitLocusValuesMap) == 0){
return false, nil, nil

View file

@ -397,7 +397,8 @@ func initializeApplicationVariables()error{
polygenicDiseases.InitializePolygenicDiseaseVariables()
traits.InitializeTraitVariables()
err = traits.InitializeTraitVariables()
if (err != nil) { return err }
err = profileFormat.InitializeProfileFormatVariables()
if (err != nil) { return err }

View file

@ -963,7 +963,7 @@ func GetFakeProfile(profileType string, identityPublicKey [32]byte, identityPriv
rsidString := helpers.ConvertInt64ToString(rsID)
attributeName := "LocusValue_rs" + rsidString
locusValueAttributeName := "LocusValue_rs" + rsidString
baseA, err := helpers.GetRandomItemFromList(locusBasesList)
if (err != nil) { return nil, err }
@ -971,9 +971,17 @@ func GetFakeProfile(profileType string, identityPublicKey [32]byte, identityPriv
baseB, err := helpers.GetRandomItemFromList(locusBasesList)
if (err != nil) { return nil, err }
attributeValue := baseA + ";" + baseB
locusValueAttributeValue := baseA + ";" + baseB
profileMap[attributeName] = attributeValue
profileMap[locusValueAttributeName] = locusValueAttributeValue
locusIsPhasedAttributeName := "LocusIsPhased_rs" + rsidString
locusIsPhased := helpers.GetRandomBool()
locusIsPhasedString := helpers.ConvertBoolToYesOrNoString(locusIsPhased)
profileMap[locusIsPhasedAttributeName] = locusIsPhasedString
}
}
}

View file

@ -38,9 +38,13 @@ func TestGenerateProfiles(t *testing.T){
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
traits.InitializeTraitVariables()
err := profileFormat.InitializeProfileFormatVariables()
err := traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
err = profileFormat.InitializeProfileFormatVariables()
if (err != nil) {
t.Fatalf("Failed to initialize profile format variables: " + err.Error())
}

View file

@ -26,7 +26,11 @@ func TestCreateCoupleGeneticAnalysis_SingleGenomes(t *testing.T){
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
traits.InitializeTraitVariables()
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
getPersonGenomesList := func()([]prepareRawGenomes.RawGenomeWithMetadata, error){
@ -101,7 +105,11 @@ func TestCreateCoupleGeneticAnalysis_SingleAndMultipleGenomes(t *testing.T){
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
traits.InitializeTraitVariables()
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
getPersonGenomesList := func(addSecondGenome bool)([]prepareRawGenomes.RawGenomeWithMetadata, error){
@ -199,7 +207,11 @@ func TestCreateCoupleGeneticAnalysis_MultipleGenomes(t *testing.T){
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
traits.InitializeTraitVariables()
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
getPersonGenomesList := func()([]prepareRawGenomes.RawGenomeWithMetadata, error){

View file

@ -1056,7 +1056,7 @@ func GetGenomeDiscreteTraitAnalysis_NeuralNetwork(traitObject traits.Trait, geno
traitName := traitObject.TraitName
neuralNetworkModelExists, traitPredictionIsPossible, predictedOutcome, predictionConfidence, quantityOfLociKnown, quantityOfPhasedLoci, err := geneticPrediction.GetNeuralNetworkTraitPredictionFromGenomeMap(traitName, genomeLocusValuesMap)
neuralNetworkModelExists, traitPredictionIsPossible, predictedOutcome, predictionConfidence, quantityOfLociKnown, quantityOfPhasedLoci, err := geneticPrediction.GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap(traitName, genomeLocusValuesMap)
if (err != nil) { return false, false, "", 0, 0, 0, err }
if (neuralNetworkModelExists == false){
return false, false, "", 0, 0, 0, nil

View file

@ -26,7 +26,11 @@ func TestCreatePersonGeneticAnalysis_SingleGenome(t *testing.T){
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
traits.InitializeTraitVariables()
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
genomeIdentifier, err := helpers.GetNewRandom16ByteArray()
if (err != nil) {
@ -85,7 +89,11 @@ func TestCreatePersonGeneticAnalysis_MultipleGenomes(t *testing.T){
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
traits.InitializeTraitVariables()
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
numberOfGenomesToAdd := helpers.GetRandomIntWithinRange(2, 5)

View file

@ -4,8 +4,6 @@
package createRawGenomes
import "seekia/resources/geneticReferences/locusMetadata"
import "seekia/internal/genetics/readRawGenomes"
import "seekia/internal/helpers"
import "seekia/internal/unixTime"
@ -25,11 +23,6 @@ import "strings"
// -error
func CreateFakeRawGenome_23andMe()(string, int64, int64, map[int64]readRawGenomes.RawGenomeLocusValue, error){
err := locusMetadata.InitializeLocusMetadataVariables()
if (err != nil){
return "", 0, 0, nil, errors.New("InitializeLocusMetadataVariables failed: " + err.Error())
}
yearUnix := unixTime.GetYearUnix()
maximumTime := time.Now().Unix()
@ -99,7 +92,7 @@ func CreateFakeRawGenome_23andMe()(string, int64, int64, map[int64]readRawGenome
# rsid chromosome position genotype
`
_, err = fileContentsBuilder.WriteString(fileHeader)
_, err := fileContentsBuilder.WriteString(fileHeader)
if (err != nil){
return "", 0, 0, nil, errors.New("Failed to WriteString to string builder: " + err.Error())
}
@ -215,11 +208,6 @@ func CreateFakeRawGenome_23andMe()(string, int64, int64, map[int64]readRawGenome
// -error
func CreateFakeRawGenome_AncestryDNA()(string, int64, int64, map[int64]readRawGenomes.RawGenomeLocusValue, error){
err := locusMetadata.InitializeLocusMetadataVariables()
if (err != nil){
return "", 0, 0, nil, errors.New("InitializeLocusMetadataVariables failed: " + err.Error())
}
yearUnix := unixTime.GetYearUnix()
maximumTime := time.Now().Unix()
@ -282,7 +270,7 @@ func CreateFakeRawGenome_AncestryDNA()(string, int64, int64, map[int64]readRawGe
rsid chromosome position allele1 allele2
`
_, err = fileContentsBuilder.WriteString(fileHeader)
_, err := fileContentsBuilder.WriteString(fileHeader)
if (err != nil){
return "", 0, 0, nil, errors.New("Failed to WriteString to string builder: " + err.Error())
}

View file

@ -212,11 +212,11 @@ func DecodeBytesToNeuralNetworkObject(inputNeuralNetwork []byte)(NeuralNetwork,
return newNeuralNetworkObject, nil
}
// This map is used to store information about how accurate genetic prediction models are
// Map Structure: Trait Outcome Info -> Trait Prediction Accuracy Info
type TraitPredictionAccuracyInfoMap map[TraitOutcomeInfo]TraitPredictionAccuracyInfo
// This map is used to store information about how accurate genetic prediction models are for discrete traits
// Map Structure: Discrete Trait Outcome Info -> Discrete Trait Prediction Accuracy Info
type DiscreteTraitPredictionAccuracyInfoMap map[DiscreteTraitOutcomeInfo]DiscreteTraitPredictionAccuracyInfo
type TraitOutcomeInfo struct{
type DiscreteTraitOutcomeInfo struct{
// This is the outcome which was found
// Example: "Blue"
@ -229,7 +229,7 @@ type TraitOutcomeInfo struct{
PercentageOfPhasedLoci int
}
type TraitPredictionAccuracyInfo struct{
type DiscreteTraitPredictionAccuracyInfo struct{
// This contains the quantity of examples for the outcome with the specified percentageOfLociTested and percentageOfPhasedLoci
QuantityOfExamples int
@ -251,7 +251,7 @@ type TraitPredictionAccuracyInfo struct{
ProbabilityOfCorrectOutcomePrediction int
}
func EncodeTraitPredictionAccuracyInfoMapToBytes(inputMap TraitPredictionAccuracyInfoMap)([]byte, error){
func EncodeDiscreteTraitPredictionAccuracyInfoMapToBytes(inputMap DiscreteTraitPredictionAccuracyInfoMap)([]byte, error){
buffer := new(bytes.Buffer)
@ -265,22 +265,22 @@ func EncodeTraitPredictionAccuracyInfoMapToBytes(inputMap TraitPredictionAccurac
return inputMapBytes, nil
}
func DecodeBytesToTraitPredictionAccuracyInfoMap(inputBytes []byte)(TraitPredictionAccuracyInfoMap, error){
func DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap(inputBytes []byte)(DiscreteTraitPredictionAccuracyInfoMap, error){
if (inputBytes == nil){
return nil, errors.New("DecodeBytesToTraitPredictionAccuracyInfoMap called with nil inputBytes.")
return nil, errors.New("DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap called with nil inputBytes.")
}
buffer := bytes.NewBuffer(inputBytes)
decoder := gob.NewDecoder(buffer)
var newTraitPredictionAccuracyInfoMap TraitPredictionAccuracyInfoMap
var newDiscreteTraitPredictionAccuracyInfoMap DiscreteTraitPredictionAccuracyInfoMap
err := decoder.Decode(&newTraitPredictionAccuracyInfoMap)
err := decoder.Decode(&newDiscreteTraitPredictionAccuracyInfoMap)
if (err != nil){ return nil, err }
return newTraitPredictionAccuracyInfoMap, nil
return newDiscreteTraitPredictionAccuracyInfoMap, nil
}
//Outputs:
@ -291,11 +291,16 @@ func DecodeBytesToTraitPredictionAccuracyInfoMap(inputBytes []byte)(TraitPredict
// -int: Quantity of loci known
// -int: Quantity of phased loci
// -error
func GetNeuralNetworkTraitPredictionFromGenomeMap(traitName string, genomeMap map[int64]locusValue.LocusValue)(bool, bool, string, int, int, int, error){
func GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap(traitName string, genomeMap map[int64]locusValue.LocusValue)(bool, bool, string, int, int, int, error){
traitObject, err := traits.GetTraitObject(traitName)
if (err != nil) { return false, false, "", 0, 0, 0, err }
traitIsDiscreteOrNumeric := traitObject.DiscreteOrNumeric
if (traitIsDiscreteOrNumeric != "Discrete"){
return false, false, "", 0, 0, 0, errors.New("GetNeuralNetworkDiscreteTraitPredictionFromGenomeMap called with non-discrete trait: " + traitName)
}
// This is a map of rsIDs which influence this trait
traitRSIDsList := traitObject.LociList
@ -370,16 +375,16 @@ func GetNeuralNetworkTraitPredictionFromGenomeMap(traitName string, genomeMap ma
neuralNetworkObject, err := DecodeBytesToNeuralNetworkObject(predictionModelBytes)
if (err != nil) { return false, false, "", 0, 0, 0, err }
outputLayer, err := GetNeuralNetworkRawPrediction(&neuralNetworkObject, neuralNetworkInput)
outputLayer, err := GetNeuralNetworkRawPrediction(&neuralNetworkObject, false, neuralNetworkInput)
if (err != nil) { return false, false, "", 0, 0, 0, err }
predictedOutcomeName, err := GetOutcomeNameFromOutputLayer(traitName, false, outputLayer)
if (err != nil) { return false, false, "", 0, 0, 0, err }
modelTraitAccuracyInfoFile, err := geneticPredictionModels.GetPredictionModelTraitAccuracyInfoBytes(traitName)
modelTraitAccuracyInfoFile, err := geneticPredictionModels.GetPredictionModelDiscreteTraitAccuracyInfoBytes(traitName)
if (err != nil) { return false, false, "", 0, 0, 0, err }
modelTraitAccuracyInfoMap, err := DecodeBytesToTraitPredictionAccuracyInfoMap(modelTraitAccuracyInfoFile)
modelTraitAccuracyInfoMap, err := DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap(modelTraitAccuracyInfoFile)
if (err != nil) { return false, false, "", 0, 0, 0, err }
// We find the model trait accuracy info object that is the most similar to our predicted outcome
@ -463,7 +468,7 @@ func GetNeuralNetworkTraitPredictionFromGenomeMap(traitName string, genomeMap ma
// -int: Number of loci values that are known and phased
// -int: Number of loci
// -error
func GetLociInfoFromInputLayer(inputLayer []float32)(int, int, int, error){
func GetLociInfoFromNetworkInputLayer(inputLayer []float32)(int, int, int, error){
// Each input layer has 3 neurons for each locus
// Each rsID (locus) is represented by 3 neurons: LocusExists/LocusIsPhased, Allele1 Value, Allele2 Value
@ -476,7 +481,7 @@ func GetLociInfoFromInputLayer(inputLayer []float32)(int, int, int, error){
inputLayerLength := len(inputLayer)
if (inputLayerLength%3 != 0){
return 0, 0, 0, errors.New("GetInputInfoFromInputLayer called with invalid length input layer: Not evenly divisible by 4.")
return 0, 0, 0, errors.New("GetLociInfoFromNetworkInputLayer called with invalid length input layer: Not evenly divisible by 4.")
}
numberOfLoci := len(inputLayer)/3
@ -505,13 +510,14 @@ func GetLociInfoFromInputLayer(inputLayer []float32)(int, int, int, error){
}
if (numberOfLociValuesThatAreKnown == 0){
return 0, 0, 0, errors.New("GetInputInfoFromInputLayer called with input layer with no known loci values.")
return 0, 0, 0, errors.New("GetLociInfoFromNetworkInputLayer called with input layer with no known loci values.")
}
return numberOfLociValuesThatAreKnown, numberOfLociValuesThatAreKnownAndPhased, numberOfLoci, nil
}
// This function returns which outcome is being described from a neural network's final output layer
// This is only used for discrete traits
// Outputs:
// -string: Output Name (Example: "Blue")
// -error
@ -1059,6 +1065,9 @@ func (inputNetwork *NeuralNetwork)getLearnables()gorgonia.Nodes{
// The function is passed a batch of TrainingData examples to train on
// Inputs:
// -string: Trait Name
// -bool: Trait is Numeric
// -An example of a numeric trait is Height
// -An example of a discrete trait is Eye Color, which has discrete outcomes (colors)
// -*NeuralNetwork
// -func()(bool, bool, TrainingData, error): Function to get the next training data.
// -Outputs:
@ -1069,7 +1078,7 @@ func (inputNetwork *NeuralNetwork)getLearnables()gorgonia.Nodes{
// Outputs:
// -bool: Process completed (was not stopped mid-way)
// -error
func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, getNextTrainingData func()(bool, bool, TrainingData, error))(bool, error){
func TrainNeuralNetwork(traitName string, traitIsNumeric bool, neuralNetworkObject *NeuralNetwork, getNextTrainingData func()(bool, bool, TrainingData, error))(bool, error){
layer1NeuronCount, _, _, layer4NeuronCount, err := getNeuralNetworkLayerSizes(traitName)
if (err != nil) { return false, err }
@ -1091,7 +1100,7 @@ func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, ge
gorgonia.WithShape(1, layer4NeuronCount),
)
err = neuralNetworkObject.buildNeuralNetwork(trainingDataInputNode)
err = neuralNetworkObject.buildNeuralNetwork(trainingDataInputNode, traitIsNumeric)
if (err != nil) { return false, err }
// This computes the loss (how accurate was our prediction)
@ -1187,7 +1196,7 @@ func TrainNeuralNetwork(traitName string, neuralNetworkObject *NeuralNetwork, ge
// Outputs:
// -[]float32: Output neurons
// -error
func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, inputLayer []float32)([]float32, error){
func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, traitIsNumeric bool, inputLayer []float32)([]float32, error){
neuralNetworkGraph := inputNeuralNetwork.graph
@ -1211,7 +1220,7 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, inputLayer
if (err != nil) { return nil, err }
err = inputNeuralNetwork.buildNeuralNetwork(inputNode)
err = inputNeuralNetwork.buildNeuralNetwork(inputNode, traitIsNumeric)
if (err != nil){ return nil, err }
// Now we create a virtual machine to compute the prediction
@ -1235,7 +1244,7 @@ func GetNeuralNetworkRawPrediction(inputNeuralNetwork *NeuralNetwork, inputLayer
// This function will take a neural network and input layer and build the network to be able to compute a prediction
// We need to run a virtual machine after calling this function in order for the prediction to be generated
func (inputNetwork *NeuralNetwork)buildNeuralNetwork(inputLayer *gorgonia.Node)error{
func (inputNetwork *NeuralNetwork)buildNeuralNetwork(inputLayer *gorgonia.Node, traitIsNumeric bool)error{
// We copy node pointer (says to do this in a resource i'm reading)
@ -1274,15 +1283,29 @@ func (inputNetwork *NeuralNetwork)buildNeuralNetwork(inputLayer *gorgonia.Node)e
return errors.New("Layer 3 multiplication failed: " + err.Error())
}
// We SoftMax the output to get the prediction
if (traitIsNumeric == false){
prediction, err := gorgonia.SoftMax(layer3Product)
if (err != nil) {
return errors.New("SoftMax failed: " + err.Error())
// We SoftMax the output to get the prediction
prediction, err := gorgonia.SoftMax(layer3Product)
if (err != nil) {
return errors.New("SoftMax failed: " + err.Error())
}
inputNetwork.prediction = prediction
} else {
// We Sigmoid the output to get the prediction
prediction, err := gorgonia.Sigmoid(layer3Product)
if (err != nil) {
return errors.New("Sigmoid failed: " + err.Error())
}
inputNetwork.prediction = prediction
}
inputNetwork.prediction = prediction
return nil
}

View file

@ -57,7 +57,7 @@ type RawGenomeLocusValue struct{
// -bool: IsPhased (allele order corresponds to haplotype)
// -map[int64]RawGenomeLocusValue: RSID -> Locus allele value(s)
// -error (file not readable)
func ReadRawGenomeFile(fileReader io.Reader) (string, int, int64, int64, bool, map[int64]RawGenomeLocusValue, error) {
func ReadRawGenomeFile(fileReader io.Reader)(string, int, int64, int64, bool, map[int64]RawGenomeLocusValue, error) {
validBasesList := []string{"C", "A", "T", "G", "I", "D"}
@ -543,4 +543,131 @@ func ReadRawGenomeFile(fileReader io.Reader) (string, int, int64, int64, bool, m
return "", 0, 0, 0, false, nil, errors.New("Cannot read genome file: File format not known.")
}
type LocusLocation struct{
Chromosome int
Position int
}
// This function reads locus locations from 23andMe genome files
// A locus location is the Chromosome and Position of the locus
//Outputs:
// -bool: Able to read file
// -map[int64]LocusLocation: Map of rsID -> Locus location object
// -error
func ReadRawGenomeFileLocusLocations(fileReader io.Reader)(bool, map[int64]LocusLocation, error){
fileBufioReader := bufio.NewReader(fileReader)
firstLine, err := fileBufioReader.ReadString('\n')
if (err != nil){
// File does not have another line
// Malformed 23andMe genome file: Too short.
return false, nil, nil
}
fileIs23andMe := strings.HasPrefix(firstLine, "# This data file generated by 23andMe at:")
if (fileIs23andMe == false){
// We can only read 23andMe files
return false, nil, nil
}
// Now we advance bufio reader to the snp rows
for {
fileLineString, err := fileBufioReader.ReadString('\n')
if (err != nil){
// File does not have another line
// Malformed 23andMe genome file: Too short.
return false, nil, nil
}
// All SNP rows come after this line:
// "# rsid chromosome position genotype"
lineReached := strings.HasPrefix(fileLineString, "# rsid")
if (lineReached == true){
break
}
}
// Map structure: Locus rsID -> LocusLocation
lociLocationsMap := make(map[int64]LocusLocation)
for {
fileLineString, err := fileBufioReader.ReadString('\n')
if (err != nil){
// File does not have another line
break
}
if (fileLineString == "\n"){
// This is the final line
break
}
fileLineWithoutNewline := strings.TrimSuffix(fileLineString, "\n")
// Rows look like this
// "rs4477212 1 82154 GG"
// "rs571313759 1 1181945 --" (-- means no entry)
// "i3001920 MT 16470 G" (one base is possible)
rowSlice := strings.Split(fileLineWithoutNewline, "\t")
if (len(rowSlice) != 4){
// Malformed 23andMe genome data: Invalid SNP row
return false, nil, nil
}
locusIdentifierString := rowSlice[0]
locusChromosomeString := rowSlice[1]
locusPositionString := rowSlice[2]
//Outputs:
// -bool: rsID found
// -int64: rsID value
getRSIDIdentifier := func()(bool, int64){
stringWithoutPrefix, prefixExists := strings.CutPrefix(locusIdentifierString, "rs")
if (prefixExists == false){
return false, 0
}
rsidInt64, err := helpers.ConvertStringToInt64(stringWithoutPrefix)
if (err != nil){
return false, 0
}
return true, rsidInt64
}
isRSID, locusRSID := getRSIDIdentifier()
if (isRSID == false){
// RSID is unknown.
// It is probably a custom identifier (Example: i713426)
continue
}
locusChromosome, err := helpers.ConvertStringToInt(locusChromosomeString)
if (err != nil){
// It is probably "MT" or "X" chromosome
continue
}
locusPosition, err := helpers.ConvertStringToInt(locusPositionString)
if (err != nil){
// 23andMe file is malformed: Contains invalid locusPosition.
return false, nil, nil
}
locusLocationObject := LocusLocation{
Chromosome: locusChromosome,
Position: locusPosition,
}
lociLocationsMap[locusRSID] = locusLocationObject
}
return true, lociLocationsMap, nil
}

View file

@ -3,6 +3,8 @@ package readRawGenomes_test
import "seekia/internal/genetics/readRawGenomes"
import "seekia/resources/geneticReferences/locusMetadata"
import "seekia/internal/genetics/createRawGenomes"
import "seekia/internal/helpers"
@ -15,6 +17,11 @@ import "strings"
func TestAncestryDNAFileReading(t *testing.T){
err := locusMetadata.InitializeLocusMetadataVariables()
if (err != nil){
t.Fatalf("InitializeLocusMetadataVariables failed: " + err.Error())
}
fileString, expectedFileTimeUnix, numberOfAddedLoci, fileRSIDsMap, err := createRawGenomes.CreateFakeRawGenome_AncestryDNA()
if (err != nil){
t.Fatalf("Failed to create fake AncestryDNA genome: " + err.Error())
@ -65,6 +72,11 @@ func TestAncestryDNAFileReading(t *testing.T){
func Test23andMeFileReading(t *testing.T){
err := locusMetadata.InitializeLocusMetadataVariables()
if (err != nil){
t.Fatalf("InitializeLocusMetadataVariables failed: " + err.Error())
}
newRawGenome, fileCreationTime, fileNumberOfLoci, fileRSIDsMap, err := createRawGenomes.CreateFakeRawGenome_23andMe()
if (err != nil){
t.Fatalf("Failed to create fake 23andMe Genome: " + err.Error())

View file

@ -16,7 +16,11 @@ func TestPersonSampleAnalyses(t *testing.T){
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
traits.InitializeTraitVariables()
err := traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
person1AnalysisObject, err := sampleAnalyses.GetSamplePerson1Analysis()
if (err != nil) {
@ -44,7 +48,11 @@ func TestCoupleSampleAnalyses(t *testing.T){
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
traits.InitializeTraitVariables()
err := traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
coupleAnalysisObject, err := sampleAnalyses.GetSampleCoupleAnalysis()
if (err != nil){

View file

@ -1404,23 +1404,23 @@ func SortIdentityHashListToUnicodeOrder(inputList [][16]byte)error{
identityHashStringsMap[identityHash] = identityHashString
}
compareFunction := func(identityHashA [16]byte, identityHashB [16]byte)int{
compareFunction := func(identityHash1 [16]byte, identityHash2 [16]byte)int{
if (identityHashA == identityHashB){
if (identityHash1 == identityHash2){
return 0
}
identityHashAString, exists := identityHashStringsMap[identityHashA]
identityHash1String, exists := identityHashStringsMap[identityHash1]
if (exists == false){
panic("identityHashA is missing from identityHashStringsMap.")
panic("identityHash1 is missing from identityHashStringsMap.")
}
identityHashBString, exists := identityHashStringsMap[identityHashB]
identityHash2String, exists := identityHashStringsMap[identityHash2]
if (exists == false){
panic("identityHashB is missing from identityHashStringsMap.")
panic("identityHash2 is missing from identityHashStringsMap.")
}
if (identityHashAString < identityHashBString){
if (identityHash1String < identityHash2String){
return -1
}

View file

@ -1382,11 +1382,15 @@ func TestCreateAndReadRequest_BroadcastContent(t *testing.T){
// We initialize these variables so we can create fake profiles
traits.InitializeTraitVariables()
err := traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err := profileFormat.InitializeProfileFormatVariables()
err = profileFormat.InitializeProfileFormatVariables()
if (err != nil) {
t.Fatalf("Failed to initialize profile format variables: " + err.Error())
}

View file

@ -326,15 +326,19 @@ func TestCreateAndReadResponse_GetProfilesInfo(t *testing.T){
func TestCreateAndReadResponse_GetProfiles(t *testing.T){
err := profileFormat.InitializeProfileFormatVariables()
err := traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = profileFormat.InitializeProfileFormatVariables()
if (err != nil) {
t.Fatalf("Failed to initialize profile format variables: " + err.Error())
}
traits.InitializeTraitVariables()
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
hostPublicIdentityKey, hostPrivateIdentityKey, err := identity.GetNewRandomPublicPrivateIdentityKeys()
if (err != nil) {
t.Fatalf("Failed to create random identity keys: " + err.Error())

View file

@ -946,6 +946,18 @@ func GetProfileAttributeDisplayInfo(attributeName string)(string, bool, func(str
return titleTranslated, false, passValueFunction, "", noResponseTranslated, nil
}
hasLocusIsPhasedPrefix := strings.HasPrefix(attributeName, "LocusIsPhased_rs")
if (hasLocusIsPhasedPrefix == true){
locusRSID := strings.TrimPrefix(attributeName, "LocusIsPhased_")
locusTranslated := translation.TranslateTextFromEnglishToMyLanguage("Locus")
isPhasedTranslated := translation.TranslateTextFromEnglishToMyLanguage("Is Phased")
titleTranslated := locusTranslated + " " + locusRSID + " " + isPhasedTranslated
return titleTranslated, false, translateValueFunction, "", noResponseTranslated, nil
}
return "", false, nil, "", "", errors.New("GetProfileAttributeDisplayInfo called with unknown attributeName: " + attributeName)
}

View file

@ -1,7 +1,11 @@
package attributeDisplay_test
import "seekia/internal/globalSettings"
import "seekia/internal/profiles/attributeDisplay"
import "seekia/resources/geneticReferences/polygenicDiseases"
import "seekia/resources/geneticReferences/traits"
import "seekia/internal/globalSettings"
import "seekia/internal/profiles/calculatedAttributes"
import "seekia/internal/profiles/profileFormat"
@ -15,6 +19,13 @@ func TestGetAttributeDisplayInfo(t *testing.T){
t.Fatalf("InitializeGlobalSettingsDatastore failed: " + err.Error())
}
polygenicDiseases.InitializePolygenicDiseaseVariables()
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
err = profileFormat.InitializeProfileFormatVariables()
if (err != nil) {
t.Fatalf("InitializeProfileFormatVariables failed: " + err.Error())

View file

@ -692,25 +692,35 @@ func GetAnyProfileAttributeIncludingCalculated(attributeName string, getProfileA
locusValueAttributeName := "LocusValue_rs" + locusRSIDString
userLocusBasePairExists, _, userLocusBasePair, err := getProfileAttributesFunction(locusValueAttributeName)
userLocusValueExists, _, userLocusValue, err := getProfileAttributesFunction(locusValueAttributeName)
if (err != nil) { return false, 0, "", err }
if (userLocusBasePairExists == false){
if (userLocusValueExists == false){
continue
}
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusBasePair, ";")
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusValue, ";")
if (semicolonFound == false){
return false, 0, "", errors.New("Database corrupt: Contains profile with invalid " + locusValueAttributeName + " value: " + userLocusBasePair)
return false, 0, "", errors.New("Database corrupt: Contains profile with invalid " + locusValueAttributeName + " value: " + userLocusValue)
}
userLocusValue := locusValue.LocusValue{
userLocusIsPhasedAttributeName := "LocusIsPhased_rs" + locusRSIDString
userLocusIsPhasedExists, _, userLocusIsPhasedString, err := getProfileAttributesFunction(userLocusIsPhasedAttributeName)
if (err != nil) { return false, 0, "", err }
if (userLocusIsPhasedExists == false){
return false, 0, "", errors.New("Database corrupt: Contains profile with locusValue but not locusIsPhased status for locus: " + locusRSIDString)
}
userLocusIsPhased, err := helpers.ConvertYesOrNoStringToBool(userLocusIsPhasedString)
if (err != nil) { return false, 0, "", err }
userLocusValueObject := locusValue.LocusValue{
Base1Value: userLocusBase1,
Base2Value: userLocusBase2,
//TODO: Share LocusIsPhased information in user profiles and retrieve it into this value
LocusIsPhased: false,
LocusIsPhased: userLocusIsPhased,
}
userDiseaseLocusValuesMap[locusRSID] = userLocusValue
userDiseaseLocusValuesMap[locusRSID] = userLocusValueObject
}
anyLocusTested, userDiseaseRiskScore, _, _, err := createPersonGeneticAnalysis.GetPersonGenomePolygenicDiseaseInfo(diseaseLociList, userDiseaseLocusValuesMap, true)
@ -801,25 +811,35 @@ func GetAnyProfileAttributeIncludingCalculated(attributeName string, getProfileA
locusValueAttributeName := "LocusValue_rs" + locusRSIDString
userLocusBasePairExists, _, userLocusBasePair, err := getProfileAttributesFunction(locusValueAttributeName)
userLocusValueExists, _, userLocusValue, err := getProfileAttributesFunction(locusValueAttributeName)
if (err != nil) { return false, 0, "", err }
if (userLocusBasePairExists == false){
if (userLocusValueExists == false){
continue
}
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusBasePair, ";")
userLocusBase1, userLocusBase2, semicolonFound := strings.Cut(userLocusValue, ";")
if (semicolonFound == false){
return false, 0, "", errors.New("GetAnyProfileAttributeIncludingCalculated called with profile containing invalid " + locusValueAttributeName + ": " + userLocusBasePair)
return false, 0, "", errors.New("GetAnyProfileAttributeIncludingCalculated called with profile containing invalid " + locusValueAttributeName + ": " + userLocusValue)
}
newLocusValue := locusValue.LocusValue{
userLocusIsPhasedAttributeName := "LocusIsPhased_rs" + locusRSIDString
userLocusIsPhasedExists, _, userLocusIsPhasedString, err := getProfileAttributesFunction(userLocusIsPhasedAttributeName)
if (err != nil) { return false, 0, "", err }
if (userLocusIsPhasedExists == false){
return false, 0, "", errors.New("Database corrupt: Contains profile with locusValue but not locusIsPhased status for locus: " + locusRSIDString)
}
userLocusIsPhased, err := helpers.ConvertYesOrNoStringToBool(userLocusIsPhasedString)
if (err != nil) { return false, 0, "", err }
newLocusValueObject := locusValue.LocusValue{
Base1Value: userLocusBase1,
Base2Value: userLocusBase2,
//TODO: Share locusIsPhased information in user profiles are put it here
LocusIsPhased: false,
LocusIsPhased: userLocusIsPhased,
}
userDiseaseLocusValuesMap[locusRSID] = newLocusValue
userDiseaseLocusValuesMap[locusRSID] = newLocusValueObject
}
anyLocusValuesTested, offspringAverageRiskScore, _, err := createCoupleGeneticAnalysis.GetOffspringPolygenicDiseaseInfo_Fast(diseaseLociList, myGenomeLocusValuesMap, userDiseaseLocusValuesMap)

View file

@ -10,6 +10,8 @@ package profileFormat
// The order was tarnished after I added and removed some attributes
import "seekia/resources/currencies"
import "seekia/resources/geneticReferences/traits"
import "seekia/resources/geneticReferences/polygenicDiseases"
import "seekia/resources/imageFiles"
import "seekia/resources/worldLanguages"
import "seekia/resources/worldLocations"
@ -93,7 +95,8 @@ type AttributeObject struct{
// This must be run once upon application startup
func InitializeProfileFormatVariables()error{
initializeProfileAttributeObjectsList()
err := initializeProfileAttributeObjectsList()
if (err != nil) { return err }
profileAttributeObjectsList, err := GetProfileAttributeObjectsList()
if (err != nil) { return err }
@ -219,7 +222,7 @@ func GetProfileAttributeObjectsList()([]AttributeObject, error){
return profileAttributeObjectsList, nil
}
func initializeProfileAttributeObjectsList(){
func initializeProfileAttributeObjectsList()error{
// Below are some standard getAttribute functions
@ -2231,6 +2234,54 @@ func initializeProfileAttributeObjectsList(){
addMonogenicDiseaseNumberOfVariantsTestedAttribute(61, "MonogenicDisease_Sickle_Cell_Anemia_NumberOfVariantsTested")
addMonogenicDiseaseVariantProbabilityAttribute(62, "MonogenicDisease_Sickle_Cell_Anemia_ProbabilityOfPassingAVariant")
// TODO: Change attributeIdentifiers so:
// -Polygenic diseases are allotted the range: 1000 - 1999
// -Monogenic diseases are allotted the range: 2000 - 9,999
// -rsIDs are allotted the range: 10,000 - 3,000,000 (profiles will probably never share more than 500,000 loci)
// We build the profile from the traits/polygenic diseases objects list
// This approach is temporary
// Once we have profile versions on a testnet/mainnet, we have to keep the loci static for each profile version
// For now, profile encodings will change whenever we add/remove locus metadata
// This map will store all rsIDs for traits and polygenic diseases
shareableRSIDsMap := make(map[int64]struct{})
traitObjectsList, err := traits.GetTraitObjectsList()
if (err != nil){ return err }
for _, traitObject := range traitObjectsList{
traitLociList := traitObject.LociList
for _, rsID := range traitLociList{
shareableRSIDsMap[rsID] = struct{}{}
}
}
polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList()
if (err != nil) { return err }
for _, diseaseObject := range polygenicDiseaseObjectsList{
diseaseLociList := diseaseObject.LociList
for _, locusObject := range diseaseLociList{
locusRSID := locusObject.LocusRSID
shareableRSIDsMap[locusRSID] = struct{}{}
}
}
shareableRSIDsList := helpers.GetListOfMapKeys(shareableRSIDsMap)
// We sort rsIDs so they are always in the same order
slices.Sort(shareableRSIDsList)
validBasesList := []string{"C", "A", "T", "G", "I", "D"}
checkValueFunction_GenomeBasePair := func(profileVersion int, profileType string, input string)(bool, bool, error){
@ -2261,14 +2312,23 @@ func initializeProfileAttributeObjectsList(){
return true, true, nil
}
addLocusValueAttributeObject := func(attributeIdentifier int, attributeName string){
addLocusValueAttributeObject := func(attributeIdentifier int, attributeName string, mandatoryAttributeName string){
getMandatoryAttributeFunction := func(profileVersion int)([]string, error){
if (profileVersion != 1){
return nil, errors.New("Trying to retrieve mandatory attributes for unknown profile version.")
}
mandatoryAttributesList := []string{mandatoryAttributeName}
return mandatoryAttributesList, nil
}
attributeObject_LocusValueBasePair := AttributeObject{
ProfileVersions: []int{1},
AttributeIdentifier: attributeIdentifier,
AttributeName: attributeName,
GetIsRequired: getIsRequired_No,
GetMandatoryAttributes: getMandatoryAttributes_None,
GetMandatoryAttributes: getMandatoryAttributeFunction,
GetProfileTypes: getProfileTypes_Mate,
GetIsCanonical: getIsCanonical_Always,
CheckValueFunction: checkValueFunction_GenomeBasePair,
@ -2277,267 +2337,52 @@ func initializeProfileAttributeObjectsList(){
attributeObjectsList = append(attributeObjectsList, attributeObject_LocusValueBasePair)
}
// TODO: Add LocusIsPhased to each rsID
// Change attributeIdentifiers so:
// -Polygenic diseases are allotted the range: 1000 - 1999
// -Monogenic diseases are allotted the range: 2000 - 9,999
// -rsIDs are allotted the range: 10,000 - 3,000,000 (profiles will probably never share more than 500,000 loci)
addLocusIsPhasedAttributeObject := func(attributeIdentifier int, attributeName string, mandatoryAttributeName string){
addLocusValueAttributeObject(500, "LocusValue_rs16942")
addLocusValueAttributeObject(501, "LocusValue_rs1045485")
addLocusValueAttributeObject(502, "LocusValue_rs34330")
addLocusValueAttributeObject(503, "LocusValue_rs144848")
addLocusValueAttributeObject(504, "LocusValue_rs766173")
addLocusValueAttributeObject(505, "LocusValue_rs1799950")
addLocusValueAttributeObject(506, "LocusValue_rs4986850")
addLocusValueAttributeObject(507, "LocusValue_rs2227945")
addLocusValueAttributeObject(508, "LocusValue_rs1799966")
addLocusValueAttributeObject(509, "LocusValue_rs4987117")
addLocusValueAttributeObject(510, "LocusValue_rs1799954")
addLocusValueAttributeObject(511, "LocusValue_rs11571746")
addLocusValueAttributeObject(512, "LocusValue_rs4987047")
addLocusValueAttributeObject(513, "LocusValue_rs11571833")
addLocusValueAttributeObject(514, "LocusValue_rs1801426")
addLocusValueAttributeObject(515, "LocusValue_rs3218707")
addLocusValueAttributeObject(516, "LocusValue_rs4987945")
addLocusValueAttributeObject(517, "LocusValue_rs4986761")
addLocusValueAttributeObject(518, "LocusValue_rs3218695")
addLocusValueAttributeObject(519, "LocusValue_rs1800056")
addLocusValueAttributeObject(520, "LocusValue_rs1800057")
addLocusValueAttributeObject(521, "LocusValue_rs3092856")
addLocusValueAttributeObject(522, "LocusValue_rs1800058")
addLocusValueAttributeObject(523, "LocusValue_rs1801673")
addLocusValueAttributeObject(524, "LocusValue_rs17879961")
addLocusValueAttributeObject(525, "LocusValue_rs182549")
addLocusValueAttributeObject(526, "LocusValue_rs4988235")
addLocusValueAttributeObject(527, "LocusValue_rs7349332")
addLocusValueAttributeObject(528, "LocusValue_rs11803731")
addLocusValueAttributeObject(529, "LocusValue_rs17646946")
addLocusValueAttributeObject(530, "LocusValue_rs11571747")
addLocusValueAttributeObject(531, "LocusValue_rs7779616")
addLocusValueAttributeObject(532, "LocusValue_rs892839")
addLocusValueAttributeObject(533, "LocusValue_rs1003719")
addLocusValueAttributeObject(534, "LocusValue_rs7617069")
addLocusValueAttributeObject(535, "LocusValue_rs7174027")
addLocusValueAttributeObject(536, "LocusValue_rs989869")
addLocusValueAttributeObject(537, "LocusValue_rs2342494")
addLocusValueAttributeObject(538, "LocusValue_rs1158810")
addLocusValueAttributeObject(539, "LocusValue_rs1800414")
addLocusValueAttributeObject(540, "LocusValue_rs1540771")
addLocusValueAttributeObject(541, "LocusValue_rs26722")
addLocusValueAttributeObject(542, "LocusValue_rs1939707")
addLocusValueAttributeObject(543, "LocusValue_rs1800401")
addLocusValueAttributeObject(544, "LocusValue_rs17184180")
addLocusValueAttributeObject(545, "LocusValue_rs35051352")
addLocusValueAttributeObject(546, "LocusValue_rs1800422")
addLocusValueAttributeObject(547, "LocusValue_rs784416")
addLocusValueAttributeObject(548, "LocusValue_rs7803030")
addLocusValueAttributeObject(549, "LocusValue_rs16977009")
addLocusValueAttributeObject(550, "LocusValue_rs622330")
addLocusValueAttributeObject(551, "LocusValue_rs16863422")
addLocusValueAttributeObject(552, "LocusValue_rs12896399")
addLocusValueAttributeObject(553, "LocusValue_rs2422239")
addLocusValueAttributeObject(554, "LocusValue_rs7495174")
addLocusValueAttributeObject(555, "LocusValue_rs13016869")
addLocusValueAttributeObject(556, "LocusValue_rs2835630")
addLocusValueAttributeObject(557, "LocusValue_rs3809761")
addLocusValueAttributeObject(558, "LocusValue_rs11636232")
addLocusValueAttributeObject(559, "LocusValue_rs1805008")
addLocusValueAttributeObject(560, "LocusValue_rs3212368")
addLocusValueAttributeObject(561, "LocusValue_rs894883")
addLocusValueAttributeObject(562, "LocusValue_rs10266101")
addLocusValueAttributeObject(563, "LocusValue_rs911015")
addLocusValueAttributeObject(564, "LocusValue_rs974448")
addLocusValueAttributeObject(565, "LocusValue_rs6950754")
addLocusValueAttributeObject(566, "LocusValue_rs28777")
addLocusValueAttributeObject(567, "LocusValue_rs11855019")
addLocusValueAttributeObject(568, "LocusValue_rs1042602")
addLocusValueAttributeObject(569, "LocusValue_rs1887276")
addLocusValueAttributeObject(570, "LocusValue_rs147068120")
addLocusValueAttributeObject(571, "LocusValue_rs9971729")
addLocusValueAttributeObject(572, "LocusValue_rs4911442")
addLocusValueAttributeObject(573, "LocusValue_rs6910861")
addLocusValueAttributeObject(574, "LocusValue_rs12543326")
addLocusValueAttributeObject(575, "LocusValue_rs10424065")
addLocusValueAttributeObject(576, "LocusValue_rs1978859")
addLocusValueAttributeObject(577, "LocusValue_rs6462562")
addLocusValueAttributeObject(578, "LocusValue_rs6020957")
addLocusValueAttributeObject(579, "LocusValue_rs2733832")
addLocusValueAttributeObject(580, "LocusValue_rs8039195")
addLocusValueAttributeObject(581, "LocusValue_rs2034128")
addLocusValueAttributeObject(582, "LocusValue_rs4353811")
addLocusValueAttributeObject(583, "LocusValue_rs7965082")
addLocusValueAttributeObject(584, "LocusValue_rs10265937")
addLocusValueAttributeObject(585, "LocusValue_rs12437560")
addLocusValueAttributeObject(586, "LocusValue_rs1019212")
addLocusValueAttributeObject(587, "LocusValue_rs805693")
addLocusValueAttributeObject(588, "LocusValue_rs6828137")
addLocusValueAttributeObject(589, "LocusValue_rs805694")
addLocusValueAttributeObject(590, "LocusValue_rs397723")
addLocusValueAttributeObject(591, "LocusValue_rs62330021")
addLocusValueAttributeObject(592, "LocusValue_rs1572037")
addLocusValueAttributeObject(593, "LocusValue_rs7219915")
addLocusValueAttributeObject(594, "LocusValue_rs112747614")
addLocusValueAttributeObject(595, "LocusValue_rs10237838")
addLocusValueAttributeObject(596, "LocusValue_rs138777265")
addLocusValueAttributeObject(597, "LocusValue_rs6918152")
addLocusValueAttributeObject(598, "LocusValue_rs3212369")
addLocusValueAttributeObject(599, "LocusValue_rs1005999")
addLocusValueAttributeObject(600, "LocusValue_rs1393350")
addLocusValueAttributeObject(601, "LocusValue_rs7176696")
addLocusValueAttributeObject(602, "LocusValue_rs4778241")
addLocusValueAttributeObject(603, "LocusValue_rs3940272")
addLocusValueAttributeObject(604, "LocusValue_rs2835621")
addLocusValueAttributeObject(605, "LocusValue_rs2034127")
addLocusValueAttributeObject(606, "LocusValue_rs9858909")
addLocusValueAttributeObject(607, "LocusValue_rs6020940")
addLocusValueAttributeObject(608, "LocusValue_rs2168809")
addLocusValueAttributeObject(609, "LocusValue_rs4433629")
addLocusValueAttributeObject(610, "LocusValue_rs16977002")
addLocusValueAttributeObject(611, "LocusValue_rs10843104")
addLocusValueAttributeObject(612, "LocusValue_rs3794604")
addLocusValueAttributeObject(613, "LocusValue_rs2854746")
addLocusValueAttributeObject(614, "LocusValue_rs10237488")
addLocusValueAttributeObject(615, "LocusValue_rs9971100")
addLocusValueAttributeObject(616, "LocusValue_rs2095645")
addLocusValueAttributeObject(617, "LocusValue_rs2385028")
addLocusValueAttributeObject(618, "LocusValue_rs6997494")
addLocusValueAttributeObject(619, "LocusValue_rs2422241")
addLocusValueAttributeObject(620, "LocusValue_rs6039272")
addLocusValueAttributeObject(621, "LocusValue_rs1105879")
addLocusValueAttributeObject(622, "LocusValue_rs4911414")
addLocusValueAttributeObject(623, "LocusValue_rs72928978")
addLocusValueAttributeObject(624, "LocusValue_rs73488486")
addLocusValueAttributeObject(625, "LocusValue_rs141318671")
addLocusValueAttributeObject(626, "LocusValue_rs4778211")
addLocusValueAttributeObject(627, "LocusValue_rs10237319")
addLocusValueAttributeObject(628, "LocusValue_rs4793389")
addLocusValueAttributeObject(629, "LocusValue_rs7183877")
addLocusValueAttributeObject(630, "LocusValue_rs12552712")
addLocusValueAttributeObject(631, "LocusValue_rs7628370")
addLocusValueAttributeObject(632, "LocusValue_rs1562005")
addLocusValueAttributeObject(633, "LocusValue_rs1015092")
addLocusValueAttributeObject(634, "LocusValue_rs7214306")
addLocusValueAttributeObject(635, "LocusValue_rs6056126")
addLocusValueAttributeObject(636, "LocusValue_rs11957757")
addLocusValueAttributeObject(637, "LocusValue_rs805722")
addLocusValueAttributeObject(638, "LocusValue_rs7277820")
addLocusValueAttributeObject(639, "LocusValue_rs12821256")
addLocusValueAttributeObject(640, "LocusValue_rs7552331")
addLocusValueAttributeObject(641, "LocusValue_rs17447439")
addLocusValueAttributeObject(642, "LocusValue_rs3935591")
addLocusValueAttributeObject(643, "LocusValue_rs3768056")
addLocusValueAttributeObject(644, "LocusValue_rs12913832")
addLocusValueAttributeObject(645, "LocusValue_rs7640340")
addLocusValueAttributeObject(646, "LocusValue_rs12155314")
addLocusValueAttributeObject(647, "LocusValue_rs9782955")
addLocusValueAttributeObject(648, "LocusValue_rs351385")
addLocusValueAttributeObject(649, "LocusValue_rs4790309")
addLocusValueAttributeObject(650, "LocusValue_rs937171")
addLocusValueAttributeObject(651, "LocusValue_rs4552364")
addLocusValueAttributeObject(652, "LocusValue_rs11191909")
addLocusValueAttributeObject(653, "LocusValue_rs728405")
addLocusValueAttributeObject(654, "LocusValue_rs1325127")
addLocusValueAttributeObject(655, "LocusValue_rs72777200")
addLocusValueAttributeObject(656, "LocusValue_rs2762462")
addLocusValueAttributeObject(657, "LocusValue_rs6749293")
addLocusValueAttributeObject(658, "LocusValue_rs7807181")
addLocusValueAttributeObject(659, "LocusValue_rs7966317")
addLocusValueAttributeObject(660, "LocusValue_rs2238289")
addLocusValueAttributeObject(661, "LocusValue_rs16891982")
addLocusValueAttributeObject(662, "LocusValue_rs2748901")
addLocusValueAttributeObject(663, "LocusValue_rs4053148")
addLocusValueAttributeObject(664, "LocusValue_rs116359091")
addLocusValueAttributeObject(665, "LocusValue_rs1129038")
addLocusValueAttributeObject(666, "LocusValue_rs7516150")
addLocusValueAttributeObject(667, "LocusValue_rs4648379")
addLocusValueAttributeObject(668, "LocusValue_rs13097965")
addLocusValueAttributeObject(669, "LocusValue_rs11237982")
addLocusValueAttributeObject(670, "LocusValue_rs2252893")
addLocusValueAttributeObject(671, "LocusValue_rs12906280")
addLocusValueAttributeObject(672, "LocusValue_rs11604811")
addLocusValueAttributeObject(673, "LocusValue_rs12335410")
addLocusValueAttributeObject(674, "LocusValue_rs6555969")
addLocusValueAttributeObject(675, "LocusValue_rs6478394")
addLocusValueAttributeObject(676, "LocusValue_rs2274107")
addLocusValueAttributeObject(677, "LocusValue_rs74409360")
addLocusValueAttributeObject(678, "LocusValue_rs10278187")
addLocusValueAttributeObject(679, "LocusValue_rs4633993")
addLocusValueAttributeObject(680, "LocusValue_rs2832438")
addLocusValueAttributeObject(681, "LocusValue_rs2894450")
addLocusValueAttributeObject(682, "LocusValue_rs875143")
addLocusValueAttributeObject(683, "LocusValue_rs916977")
addLocusValueAttributeObject(684, "LocusValue_rs341147")
addLocusValueAttributeObject(685, "LocusValue_rs1999527")
addLocusValueAttributeObject(686, "LocusValue_rs10234405")
addLocusValueAttributeObject(687, "LocusValue_rs2327101")
addLocusValueAttributeObject(688, "LocusValue_rs8028689")
addLocusValueAttributeObject(689, "LocusValue_rs717463")
addLocusValueAttributeObject(690, "LocusValue_rs8079498")
addLocusValueAttributeObject(691, "LocusValue_rs12593929")
addLocusValueAttributeObject(692, "LocusValue_rs12203592")
addLocusValueAttributeObject(693, "LocusValue_rs4521336")
addLocusValueAttributeObject(694, "LocusValue_rs1834640")
addLocusValueAttributeObject(695, "LocusValue_rs13098099")
addLocusValueAttributeObject(696, "LocusValue_rs975633")
addLocusValueAttributeObject(697, "LocusValue_rs13297008")
addLocusValueAttributeObject(698, "LocusValue_rs2240203")
addLocusValueAttributeObject(699, "LocusValue_rs3829241")
addLocusValueAttributeObject(700, "LocusValue_rs12694574")
addLocusValueAttributeObject(701, "LocusValue_rs2034129")
addLocusValueAttributeObject(702, "LocusValue_rs1800407")
addLocusValueAttributeObject(703, "LocusValue_rs348613")
addLocusValueAttributeObject(704, "LocusValue_rs7182710")
addLocusValueAttributeObject(705, "LocusValue_rs142317543")
addLocusValueAttributeObject(706, "LocusValue_rs7781059")
addLocusValueAttributeObject(707, "LocusValue_rs4778138")
addLocusValueAttributeObject(708, "LocusValue_rs1126809")
addLocusValueAttributeObject(709, "LocusValue_rs1408799")
addLocusValueAttributeObject(710, "LocusValue_rs1562006")
addLocusValueAttributeObject(711, "LocusValue_rs12452184")
addLocusValueAttributeObject(712, "LocusValue_rs10209564")
addLocusValueAttributeObject(713, "LocusValue_rs12913823")
addLocusValueAttributeObject(714, "LocusValue_rs11631797")
addLocusValueAttributeObject(715, "LocusValue_rs6944702")
addLocusValueAttributeObject(716, "LocusValue_rs6693258")
addLocusValueAttributeObject(717, "LocusValue_rs642742")
addLocusValueAttributeObject(718, "LocusValue_rs6795519")
addLocusValueAttributeObject(719, "LocusValue_rs6039266")
addLocusValueAttributeObject(720, "LocusValue_rs2070959")
addLocusValueAttributeObject(721, "LocusValue_rs6420484")
addLocusValueAttributeObject(722, "LocusValue_rs2835660")
addLocusValueAttributeObject(723, "LocusValue_rs12358982")
addLocusValueAttributeObject(724, "LocusValue_rs16977008")
addLocusValueAttributeObject(725, "LocusValue_rs1667394")
addLocusValueAttributeObject(726, "LocusValue_rs1426654")
addLocusValueAttributeObject(727, "LocusValue_rs1939697")
addLocusValueAttributeObject(728, "LocusValue_rs7170852")
addLocusValueAttributeObject(729, "LocusValue_rs121908120")
addLocusValueAttributeObject(730, "LocusValue_rs2327089")
addLocusValueAttributeObject(731, "LocusValue_rs911020")
addLocusValueAttributeObject(732, "LocusValue_rs6058017")
addLocusValueAttributeObject(733, "LocusValue_rs6462544")
addLocusValueAttributeObject(734, "LocusValue_rs2108166")
addLocusValueAttributeObject(735, "LocusValue_rs17252053")
addLocusValueAttributeObject(736, "LocusValue_rs9301973")
addLocusValueAttributeObject(737, "LocusValue_rs35264875")
addLocusValueAttributeObject(738, "LocusValue_rs9894429")
addLocusValueAttributeObject(739, "LocusValue_rs10485860")
addLocusValueAttributeObject(740, "LocusValue_rs1008591")
addLocusValueAttributeObject(741, "LocusValue_rs6056119")
addLocusValueAttributeObject(742, "LocusValue_rs3912104")
addLocusValueAttributeObject(743, "LocusValue_rs790464")
addLocusValueAttributeObject(744, "LocusValue_rs4778218")
addLocusValueAttributeObject(745, "LocusValue_rs1747677")
addLocusValueAttributeObject(746, "LocusValue_rs6056066")
addLocusValueAttributeObject(747, "LocusValue_rs12614022")
addLocusValueAttributeObject(748, "LocusValue_rs7799331")
addLocusValueAttributeObject(749, "LocusValue_rs1805007")
addLocusValueAttributeObject(750, "LocusValue_rs4648477")
addLocusValueAttributeObject(751, "LocusValue_rs4648478")
addLocusValueAttributeObject(752, "LocusValue_rs9692219")
getMandatoryAttributeFunction := func(profileVersion int)([]string, error){
if (profileVersion != 1){
return nil, errors.New("Trying to retrieve mandatory attributes for unknown profile version.")
}
mandatoryAttributesList := []string{mandatoryAttributeName}
return mandatoryAttributesList, nil
}
attributeObject_LocusIsPhased := AttributeObject{
ProfileVersions: []int{1},
AttributeIdentifier: attributeIdentifier,
AttributeName: attributeName,
GetIsRequired: getIsRequired_No,
GetMandatoryAttributes: getMandatoryAttributeFunction,
GetProfileTypes: getProfileTypes_Mate,
GetIsCanonical: getIsCanonical_Always,
CheckValueFunction: checkValueFunction_MateYesOrNo,
}
attributeObjectsList = append(attributeObjectsList, attributeObject_LocusIsPhased)
}
index := 10000
for _, rsID := range shareableRSIDsList{
rsIDString := helpers.ConvertInt64ToString(rsID)
locusValueAttributeName := "LocusValue_rs" + rsIDString
locusIsPhasedAttributeName := "LocusIsPhased_rs" + rsIDString
addLocusValueAttributeObject(index, locusValueAttributeName, locusIsPhasedAttributeName)
index += 1
addLocusIsPhasedAttributeObject(index, locusIsPhasedAttributeName, locusValueAttributeName)
index += 1
}
profileAttributeObjectsList = attributeObjectsList
return nil
}

View file

@ -12,7 +12,14 @@ import "strings"
func TestProfileFormat(t *testing.T){
err := profileFormat.InitializeProfileFormatVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
err := traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
err = profileFormat.InitializeProfileFormatVariables()
if (err != nil){
t.Fatalf("Failed to initialize profile format variables: " + err.Error())
}
@ -218,7 +225,10 @@ func TestProfileGeneticReferences(t *testing.T){
}
}
traits.InitializeTraitVariables()
err = traits.InitializeTraitVariables()
if (err != nil) {
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
traitObjectsList, err := traits.GetTraitObjectsList()
if (err != nil){

View file

@ -35,8 +35,8 @@ var predictionModel_EyeColor []byte
//go:embed predictionModels/LactoseToleranceModel.gob
var predictionModel_LactoseTolerance []byte
// The files returned by this function are .gob encoded geneticPrediction.TraitPredictionAccuracyInfoMap objects
func GetPredictionModelTraitAccuracyInfoBytes(traitName string)([]byte, error){
// The files returned by this function are .gob encoded geneticPrediction.DiscreteTraitPredictionAccuracyInfoMap objects
func GetPredictionModelDiscreteTraitAccuracyInfoBytes(traitName string)([]byte, error){
switch traitName{
case "Eye Color":{

View file

@ -28,18 +28,18 @@ func TestGeneticPredictionModels(t *testing.T){
func TestGeneticPredictionModelAccuracies(t *testing.T){
traitNamesList := []string{"Eye Color", "Lactose Tolerance"}
discreteTraitNamesList := []string{"Eye Color", "Lactose Tolerance"}
for _, traitName := range traitNamesList{
for _, traitName := range discreteTraitNamesList{
accuracyInfoBytes, err := geneticPredictionModels.GetPredictionModelTraitAccuracyInfoBytes(traitName)
accuracyInfoBytes, err := geneticPredictionModels.GetPredictionModelDiscreteTraitAccuracyInfoBytes(traitName)
if (err != nil){
t.Fatalf("GetGeneticPredictionModelBytes failed: " + err.Error())
t.Fatalf("GetPredictionModelDiscreteTraitAccuracyInfoBytes failed: " + err.Error())
}
_, err = geneticPrediction.DecodeBytesToTraitPredictionAccuracyInfoMap(accuracyInfoBytes)
_, err = geneticPrediction.DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap(accuracyInfoBytes)
if (err != nil){
t.Fatalf("DecodeBytesToTraitPredictionAccuracyInfoMap failed: " + err.Error())
t.Fatalf("DecodeBytesToDiscreteTraitPredictionAccuracyInfoMap failed: " + err.Error())
}
}
}

View file

@ -296,7 +296,10 @@ func TestGeneticReferences(t *testing.T){
}
}
traits.InitializeTraitVariables()
err = traits.InitializeTraitVariables()
if (err != nil){
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
}
traitObjectsList, err := traits.GetTraitObjectsList()
if (err != nil){
@ -499,12 +502,10 @@ func TestGeneticReferences(t *testing.T){
//
// We only care about alias collisions within each company.
// Multiple companies can refer to the same location with the same alias.
//
type companyAliasStruct struct{
geneticsCompany locusMetadata.GeneticsCompany
locusAlias string
}
@ -519,6 +520,8 @@ func TestGeneticReferences(t *testing.T){
rsidsList := locusMetadataObject.RSIDsList
locusChromosome := locusMetadataObject.Chromosome
locusPosition := locusMetadataObject.Position
geneInfoIsKnown := locusMetadataObject.GeneInfoIsKnown
geneExists := locusMetadataObject.GeneExists
geneNamesList := locusMetadataObject.GeneNamesList
locusCompanyAliasesMap := locusMetadataObject.CompanyAliases
referencesMap := locusMetadataObject.References
@ -541,8 +544,8 @@ func TestGeneticReferences(t *testing.T){
_, exists := locusMetadataRSIDsMap[rsID]
if (exists == true){
RSIDString := helpers.ConvertInt64ToString(rsID)
t.Fatalf("locusMetadataObjectsList contains duplicate RSID: " + RSIDString)
rsidString := helpers.ConvertInt64ToString(rsID)
t.Fatalf("locusMetadataObjectsList contains duplicate RSID: " + rsidString)
}
locusMetadataRSIDsMap[rsID] = struct{}{}
@ -580,7 +583,12 @@ func TestGeneticReferences(t *testing.T){
locusPositionsMap[locusPositionObject] = struct{}{}
if (len(geneNamesList) != 0){
if (geneInfoIsKnown == true && geneExists == true){
if (len(geneNamesList) == 0){
t.Fatalf("locusMetadataObjectsList contains locus with known gene and empty geneNamesList.")
}
for _, geneName := range geneNamesList{
if (geneName == ""){
t.Fatalf("locusMetadataObjectsList contains locus with empty geneName in geneNamesList.")
@ -593,7 +601,6 @@ func TestGeneticReferences(t *testing.T){
for _, locusCompanyAlias := range companyAliasesList{
companyAliasObject := companyAliasStruct{
geneticsCompany: companyObject,
locusAlias: locusCompanyAlias,
}
@ -613,6 +620,8 @@ func TestGeneticReferences(t *testing.T){
}
}
//TODO: Check to make sure that there are no identical company aliases for different loci
missingLociList := make([]int64, 0)
for rsID, _ := range allRSIDsMap{

View file

@ -1,182 +0,0 @@
[
{
"RSIDsList": [
17646946
],
"Chromosome": 1,
"Position": 152090291,
"GeneNamesList": [
"TCHHL1"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs17646946": "https://www.snpedia.com/index.php/Rs17646946"
}
},
{
"RSIDsList": [
11803731
],
"Chromosome": 1,
"Position": 152110849,
"GeneNamesList": [
"TCHH"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs11803731": "https://www.snpedia.com/index.php/Rs11803731"
}
},
{
"RSIDsList": [
4648379
],
"Chromosome": 1,
"Position": 3261516,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - Appearance": "https://www.snpedia.com/index.php/Appearance"
}
},
{
"RSIDsList": [
1999527
],
"Chromosome": 1,
"Position": 3256108,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7516150
],
"Chromosome": 1,
"Position": 3253889,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7552331
],
"Chromosome": 1,
"Position": 3253941,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
9782955
],
"Chromosome": 1,
"Position": 236039877,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
3768056
],
"Chromosome": 1,
"Position": 235907825,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
351385
],
"Chromosome": 1,
"Position": 212421629,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1572037
],
"Chromosome": 1,
"Position": 3254369,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6693258,
56426910
],
"Chromosome": 1,
"Position": 9106285,
"GeneNamesList": [
"GPR157"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4648477
],
"Chromosome": 1,
"Position": 3335411,
"GeneNamesList": [
"PRDM16"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4648478,
56579652,
58636362
],
"Chromosome": 1,
"Position": 3335443,
"GeneNamesList": [
"PRDM16"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2385028,
35558782,
4660119,
4428879
],
"Chromosome": 1,
"Position": 235872505,
"GeneNamesList": [
"LYST"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,97 +0,0 @@
[
{
"RSIDsList": [
2274107
],
"Chromosome": 10,
"Position": 105838703,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1747677
],
"Chromosome": 10,
"Position": 105815241,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
805722
],
"Chromosome": 10,
"Position": 105810400,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
805693
],
"Chromosome": 10,
"Position": 105815324,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12358982
],
"Chromosome": 10,
"Position": 104094571,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
805694
],
"Chromosome": 10,
"Position": 104055696,
"GeneNamesList": [
"COL17A1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
11191909
],
"Chromosome": 10,
"Position": 104053243,
"GeneNamesList": [
"COL17A1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
9971100,
10883964
],
"Chromosome": 10,
"Position": 104066661,
"GeneNamesList": [
"COL17A1"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,280 +0,0 @@
[
{
"RSIDsList": [
4987945,
2227924
],
"Chromosome": 11,
"Position": 108251865,
"GeneNamesList": [
"ATM"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs4987945": "https://www.snpedia.com/index.php/Rs4987945"
}
},
{
"RSIDsList": [
3218695
],
"Chromosome": 11,
"Position": 108259051,
"GeneNamesList": [
"ATM"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs3218695": "https://www.snpedia.com/index.php/Rs3218695"
}
},
{
"RSIDsList": [
3218707
],
"Chromosome": 11,
"Position": 108244000,
"GeneNamesList": [
"ATM"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs3218707": "https://www.snpedia.com/index.php/Rs3218707"
}
},
{
"RSIDsList": [
334,
77121243
],
"Chromosome": 11,
"Position": 5227002,
"GeneNamesList": [
"HBB"
],
"CompanyAliases": {
"1": [
"i3003137"
]
},
"References": {
"SNPedia.com - rs334": "https://www.snpedia.com/index.php/Rs334"
}
},
{
"RSIDsList": [
1801673
],
"Chromosome": 11,
"Position": 108304736,
"GeneNamesList": [
"ATM"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs1801673": "https://www.snpedia.com/index.php/Rs1801673"
}
},
{
"RSIDsList": [
1800056
],
"Chromosome": 11,
"Position": 108267276,
"GeneNamesList": [
"ATM"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs1800056": "https://www.snpedia.com/index.php/Rs1800056"
}
},
{
"RSIDsList": [
1800057
],
"Chromosome": 11,
"Position": 108272729,
"GeneNamesList": [
"ATM"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs1800057": "https://www.snpedia.com/index.php/Rs1800057"
}
},
{
"RSIDsList": [
4986761
],
"Chromosome": 11,
"Position": 108254034,
"GeneNamesList": [
"ATM"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs4986761": "https://www.snpedia.com/index.php/Rs4986761"
}
},
{
"RSIDsList": [
3092856
],
"Chromosome": 11,
"Position": 108289005,
"GeneNamesList": [
"ATM"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs3092856": "https://www.snpedia.com/index.php/Rs3092856"
}
},
{
"RSIDsList": [
1800058
],
"Chromosome": 11,
"Position": 108289623,
"GeneNamesList": [
"ATM"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs1800058": "https://www.snpedia.com/index.php/Rs1800058"
}
},
{
"RSIDsList": [
11237982
],
"Chromosome": 11,
"Position": 79441694,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1939707
],
"Chromosome": 11,
"Position": 100102098,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1042602
],
"Chromosome": 11,
"Position": 88911696,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1393350
],
"Chromosome": 11,
"Position": 89011046,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1126809
],
"Chromosome": 11,
"Position": 89017961,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
11604811
],
"Chromosome": 11,
"Position": 72389984,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
3829241
],
"Chromosome": 11,
"Position": 68855363,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
35264875
],
"Chromosome": 11,
"Position": 68846399,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1939697
],
"Chromosome": 11,
"Position": 100091693,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1800422
],
"Chromosome": 11,
"Position": 89284793,
"GeneNamesList": [
"TYR"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
72928978
],
"Chromosome": 11,
"Position": 69063896,
"GeneNamesList": [
"TPCN2"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,137 +0,0 @@
[
{
"RSIDsList": [
34330
],
"Chromosome": 12,
"Position": 12717761,
"GeneNamesList": [
"CDKN1B",
"GPR19"
],
"CompanyAliases": {},
"References": {
"SNPedia - rs34330": "https://www.snpedia.com/index.php/Rs34330"
}
},
{
"RSIDsList": [
17252053
],
"Chromosome": 12,
"Position": 85727948,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1887276
],
"Chromosome": 12,
"Position": 100797485,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4433629
],
"Chromosome": 12,
"Position": 90341455,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
10843104
],
"Chromosome": 12,
"Position": 28276626,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12821256
],
"Chromosome": 12,
"Position": 89328335,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7965082
],
"Chromosome": 12,
"Position": 100800193,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
9971729
],
"Chromosome": 12,
"Position": 23979791,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
642742
],
"Chromosome": 12,
"Position": 89299746,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7966317
],
"Chromosome": 12,
"Position": 100795311,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
790464
],
"Chromosome": 12,
"Position": 92174057,
"GeneNamesList": [
"BTG1-DT"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,162 +0,0 @@
[
{
"RSIDsList": [
11571746
],
"Chromosome": 13,
"Position": 32370971,
"GeneNamesList": [
"BRCA2"
],
"CompanyAliases": {
"1": [
"i5009299"
]
},
"References": {
"SNPedia.com - rs11571746": "https://www.snpedia.com/index.php/Rs11571746"
}
},
{
"RSIDsList": [
11571747
],
"Chromosome": 13,
"Position": 32371035,
"GeneNamesList": [
"BRCA2"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs11571747": "https://www.snpedia.com/index.php/Rs11571747"
}
},
{
"RSIDsList": [
766173
],
"Chromosome": 13,
"Position": 32332343,
"GeneNamesList": [
"BRCA2"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs766173": "https://www.snpedia.com/index.php/Rs766173"
}
},
{
"RSIDsList": [
1801426
],
"Chromosome": 13,
"Position": 32398747,
"GeneNamesList": [
"BRCA2"
],
"CompanyAliases": {
"1": [
"i5009256"
]
},
"References": {
"SNPedia.com - rs1801426": "https://www.snpedia.com/index.php/Rs1801426"
}
},
{
"RSIDsList": [
4987117
],
"Chromosome": 13,
"Position": 32340099,
"GeneNamesList": [
"BRCA2"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs4987117": "https://www.snpedia.com/index.php/Rs4987117"
}
},
{
"RSIDsList": [
1799954
],
"Chromosome": 13,
"Position": 32340455,
"GeneNamesList": [
"BRCA2"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs1799954": "https://www.snpedia.com/index.php/Rs1799954"
}
},
{
"RSIDsList": [
144848
],
"Chromosome": 13,
"Position": 32332592,
"GeneNamesList": [
"BRCA2"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs144848": "https://www.snpedia.com/index.php/Rs144848"
}
},
{
"RSIDsList": [
4987047
],
"Chromosome": 13,
"Position": 32379392,
"GeneNamesList": [
"BRCA2"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs4987047": "https://www.snpedia.com/index.php/Rs4987047"
}
},
{
"RSIDsList": [
11571833
],
"Chromosome": 13,
"Position": 32398489,
"GeneNamesList": [
"BRCA2"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs11571833": "https://www.snpedia.com/index.php/Rs11571833"
}
},
{
"RSIDsList": [
2095645
],
"Chromosome": 13,
"Position": 74178399,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
9301973,
61272261,
17254025
],
"Chromosome": 13,
"Position": 94537147,
"GeneNamesList": [
"DCT"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,36 +0,0 @@
[
{
"RSIDsList": [
12896399
],
"Chromosome": 14,
"Position": 92773663,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
17184180
],
"Chromosome": 14,
"Position": 92780387,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
138777265
],
"Chromosome": 14,
"Position": 68769419,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,447 +0,0 @@
[
{
"RSIDsList": [
7183877
],
"Chromosome": 15,
"Position": 28365733,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1800407
],
"Chromosome": 15,
"Position": 28230318,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1129038
],
"Chromosome": 15,
"Position": 28356859,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7495174
],
"Chromosome": 15,
"Position": 28344238,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7174027
],
"Chromosome": 15,
"Position": 28328765,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1800414
],
"Chromosome": 15,
"Position": 28197037,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2240203
],
"Chromosome": 15,
"Position": 28494202,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4778218
],
"Chromosome": 15,
"Position": 28211758,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4778211
],
"Chromosome": 15,
"Position": 28199305,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
728405
],
"Chromosome": 15,
"Position": 28199853,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
8028689
],
"Chromosome": 15,
"Position": 28488888,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12906280
],
"Chromosome": 15,
"Position": 30265887,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
3935591
],
"Chromosome": 15,
"Position": 28374012,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1667394
],
"Chromosome": 15,
"Position": 28530182,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1800401
],
"Chromosome": 15,
"Position": 28260053,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12913823
],
"Chromosome": 15,
"Position": 50509591,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1426654
],
"Chromosome": 15,
"Position": 48426484,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12913832
],
"Chromosome": 15,
"Position": 28365618,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12593929
],
"Chromosome": 15,
"Position": 28359258,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
916977
],
"Chromosome": 15,
"Position": 28513364,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
11636232
],
"Chromosome": 15,
"Position": 28386626,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4778241
],
"Chromosome": 15,
"Position": 28338713,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
8039195
],
"Chromosome": 15,
"Position": 28516084,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
3794604
],
"Chromosome": 15,
"Position": 28272065,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
989869
],
"Chromosome": 15,
"Position": 28006306,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1834640
],
"Chromosome": 15,
"Position": 48392165,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7170852
],
"Chromosome": 15,
"Position": 28427986,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4778138
],
"Chromosome": 15,
"Position": 28335820,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
784416
],
"Chromosome": 15,
"Position": 49012925,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7176696
],
"Chromosome": 15,
"Position": 49073903,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
3940272
],
"Chromosome": 15,
"Position": 28468723,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2238289
],
"Chromosome": 15,
"Position": 28453215,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
937171
],
"Chromosome": 15,
"Position": 50194749,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
11631797
],
"Chromosome": 15,
"Position": 28502279,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12437560
],
"Chromosome": 15,
"Position": 61832507,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
11855019,
59065625
],
"Chromosome": 15,
"Position": 28090674,
"GeneNamesList": [
"OCA2"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7182710,
17466298,
61298156
],
"Chromosome": 15,
"Position": 48812737,
"GeneNamesList": [
"CEP152"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,50 +0,0 @@
[
{
"RSIDsList": [
1805007
],
"Chromosome": 16,
"Position": 89986117,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1805008
],
"Chromosome": 16,
"Position": 89986144,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
3212369
],
"Chromosome": 16,
"Position": 89986760,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
3212368
],
"Chromosome": 16,
"Position": 89920224,
"GeneNamesList": [
"MC1R"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,214 +0,0 @@
[
{
"RSIDsList": [
1799966
],
"Chromosome": 17,
"Position": 43071077,
"GeneNamesList": [
"BRCA1"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs1799966": "https://www.snpedia.com/index.php/Rs1799966"
}
},
{
"RSIDsList": [
1799950
],
"Chromosome": 17,
"Position": 43094464,
"GeneNamesList": [
"BRCA1"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs1799950": "https://www.snpedia.com/index.php/Rs1799950"
}
},
{
"RSIDsList": [
2227945
],
"Chromosome": 17,
"Position": 43092113,
"GeneNamesList": [
"BRCA1"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs2227945": "https://www.snpedia.com/index.php/Rs2227945"
}
},
{
"RSIDsList": [
16942
],
"Chromosome": 17,
"Position": 43091983,
"GeneNamesList": [
"BRCA1"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs16942": "https://www.snpedia.com/index.php/Rs16942"
}
},
{
"RSIDsList": [
4986850
],
"Chromosome": 17,
"Position": 43093454,
"GeneNamesList": [
"BRCA1"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs4986850": "https://www.snpedia.com/index.php/Rs4986850"
}
},
{
"RSIDsList": [
9894429
],
"Chromosome": 17,
"Position": 79596811,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12452184
],
"Chromosome": 17,
"Position": 79664426,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
16977009
],
"Chromosome": 17,
"Position": 69916524,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7219915
],
"Chromosome": 17,
"Position": 79591813,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
8079498
],
"Chromosome": 17,
"Position": 69919452,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
3809761
],
"Chromosome": 17,
"Position": 67497367,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
16977008
],
"Chromosome": 17,
"Position": 69916480,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
16977002
],
"Chromosome": 17,
"Position": 71919192,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6420484,
59590586,
17859003,
17846019
],
"Chromosome": 17,
"Position": 81645371,
"GeneNamesList": [
"TSPAN10"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4793389
],
"Chromosome": 17,
"Position": 71921776,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4790309,
58087488
],
"Chromosome": 17,
"Position": 2063595,
"GeneNamesList": [
"HIC1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7214306
],
"Chromosome": 17,
"Position": 71925130,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,62 +0,0 @@
[
{
"RSIDsList": [
1008591
],
"Chromosome": 19,
"Position": 46730614,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1019212,
58273978,
17660257
],
"Chromosome": 19,
"Position": 46225962,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
73488486
],
"Chromosome": 19,
"Position": 7516739,
"GeneNamesList": [
"ZNF358"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
10424065
],
"Chromosome": 19,
"Position": 3545024,
"GeneNamesList": [
"MFSD12"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
142317543
],
"Chromosome": 19,
"Position": 3547687,
"GeneNamesList": [
"MFSD12"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,276 +0,0 @@
[
{
"RSIDsList": [
182549
],
"Chromosome": 2,
"Position": 135859184,
"GeneNamesList": [
"MCM6"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs182549": "https://www.snpedia.com/index.php/Rs182549"
}
},
{
"RSIDsList": [
1045485
],
"Chromosome": 2,
"Position": 201284866,
"GeneNamesList": [
"CASP8"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs1045485": "https://www.snpedia.com/index.php/Rs1045485"
}
},
{
"RSIDsList": [
4988235
],
"Chromosome": 2,
"Position": 135851076,
"GeneNamesList": [
"MCM6"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs4988235": "https://www.snpedia.com/index.php/Rs4988235"
}
},
{
"RSIDsList": [
7349332
],
"Chromosome": 2,
"Position": 218891661,
"GeneNamesList": [
"WNT10A"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs7349332": "https://www.snpedia.com/index.php/Rs7349332"
}
},
{
"RSIDsList": [
2422241
],
"Chromosome": 2,
"Position": 119043036,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
16863422
],
"Chromosome": 2,
"Position": 222990015,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12694574
],
"Chromosome": 2,
"Position": 222993733,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1105879
],
"Chromosome": 2,
"Position": 234602202,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
974448
],
"Chromosome": 2,
"Position": 223005314,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1005999
],
"Chromosome": 2,
"Position": 105523791,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2070959
],
"Chromosome": 2,
"Position": 234602191,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1978859
],
"Chromosome": 2,
"Position": 223082331,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2894450
],
"Chromosome": 2,
"Position": 222997104,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2422239
],
"Chromosome": 2,
"Position": 119029079,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
892839
],
"Chromosome": 2,
"Position": 239406446,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
10209564
],
"Chromosome": 2,
"Position": 239459603,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
121908120
],
"Chromosome": 2,
"Position": 219755011,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12614022
],
"Chromosome": 2,
"Position": 222618951,
"GeneNamesList": [
"FARSB"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6749293
],
"Chromosome": 2,
"Position": 172302075,
"GeneNamesList": [
"LOC107985960"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
112747614
],
"Chromosome": 2,
"Position": 206085512,
"GeneNamesList": [
"INO80D"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
74409360
],
"Chromosome": 2,
"Position": 238367637,
"GeneNamesList": [
"TRAF3IP1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
13016869,
56853446,
56528773
],
"Chromosome": 2,
"Position": 46006242,
"GeneNamesList": [
"PRKCE"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,234 +0,0 @@
[
{
"RSIDsList": [
4053148
],
"Chromosome": 20,
"Position": 8772544,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4911414
],
"Chromosome": 20,
"Position": 32729444,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4911442
],
"Chromosome": 20,
"Position": 33355046,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2748901
],
"Chromosome": 20,
"Position": 4948248,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1015092
],
"Chromosome": 20,
"Position": 8750062,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
911020
],
"Chromosome": 20,
"Position": 49671946,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6058017
],
"Chromosome": 20,
"Position": 32856998,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2327089
],
"Chromosome": 20,
"Position": 8769180,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6020957
],
"Chromosome": 20,
"Position": 49687635,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2327101
],
"Chromosome": 20,
"Position": 8734263,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6039266
],
"Chromosome": 20,
"Position": 8766071,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6056066
],
"Chromosome": 20,
"Position": 8738169,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
975633
],
"Chromosome": 20,
"Position": 8765289,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4633993,
111186477
],
"Chromosome": 20,
"Position": 8789461,
"GeneNamesList": [
"PLCB1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
911015,
60505384
],
"Chromosome": 20,
"Position": 51073634,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6020940,
7271570
],
"Chromosome": 20,
"Position": 51058312,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6056119,
58122852,
6516401
],
"Chromosome": 20,
"Position": 8792648,
"GeneNamesList": [
"PLCB1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6056126,
59241198,
7260663
],
"Chromosome": 20,
"Position": 8795023,
"GeneNamesList": [
"PLCB1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6039272,
7269212
],
"Chromosome": 20,
"Position": 8792227,
"GeneNamesList": [
"PLCB1"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,98 +0,0 @@
[
{
"RSIDsList": [
2252893
],
"Chromosome": 21,
"Position": 38507572,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2835630
],
"Chromosome": 21,
"Position": 38521842,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1003719
],
"Chromosome": 21,
"Position": 38491095,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2835621
],
"Chromosome": 21,
"Position": 38510616,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2832438
],
"Chromosome": 21,
"Position": 31137937,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7277820
],
"Chromosome": 21,
"Position": 38580309,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2835660
],
"Chromosome": 21,
"Position": 37196581,
"GeneNamesList": [
"TTC3"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
622330
],
"Chromosome": 21,
"Position": 43363407,
"GeneNamesList": [
"LINC01679"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,41 +0,0 @@
[
{
"RSIDsList": [
17879961
],
"Chromosome": 22,
"Position": 28725099,
"GeneNamesList": [
"CHEK2"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs17879961": "https://www.snpedia.com/index.php/Rs17879961"
}
},
{
"RSIDsList": [
397723
],
"Chromosome": 22,
"Position": 48112790,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
35051352,
62226058
],
"Chromosome": 22,
"Position": 45973777,
"GeneNamesList": [
"WNT7B"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,231 +0,0 @@
[
{
"RSIDsList": [
4552364
],
"Chromosome": 3,
"Position": 88974863,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
717463
],
"Chromosome": 3,
"Position": 59372700,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
116359091
],
"Chromosome": 3,
"Position": 69980177,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6795519
],
"Chromosome": 3,
"Position": 59388206,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
9858909
],
"Chromosome": 3,
"Position": 88378348,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
13097965
],
"Chromosome": 3,
"Position": 184339757,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
17447439
],
"Chromosome": 3,
"Position": 189549423,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4353811
],
"Chromosome": 3,
"Position": 88981207,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7628370
],
"Chromosome": 3,
"Position": 59370600,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2034127
],
"Chromosome": 3,
"Position": 59368074,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2168809
],
"Chromosome": 3,
"Position": 88377746,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2034129
],
"Chromosome": 3,
"Position": 59368293,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2034128
],
"Chromosome": 3,
"Position": 59368259,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
894883
],
"Chromosome": 3,
"Position": 59373255,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
3912104
],
"Chromosome": 3,
"Position": 42720996,
"GeneNamesList": [
"CCDC13"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7617069
],
"Chromosome": 3,
"Position": 59384969,
"GeneNamesList": [
"CFAP20DC-DT"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
13098099,
60851446
],
"Chromosome": 3,
"Position": 184621879,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7640340,
61716056
],
"Chromosome": 3,
"Position": 59394285,
"GeneNamesList": [
"CFAP20DC-DT"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
875143,
61193087
],
"Chromosome": 3,
"Position": 59394645,
"GeneNamesList": [
"CFAP20DC-DT"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,37 +0,0 @@
[
{
"RSIDsList": [
6828137
],
"Chromosome": 4,
"Position": 90059434,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
141318671
],
"Chromosome": 4,
"Position": 58493393,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
4521336,
58489362
],
"Chromosome": 4,
"Position": 23937776,
"GeneNamesList": [
"PPARGC1A"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,96 +0,0 @@
[
{
"RSIDsList": [
11957757
],
"Chromosome": 5,
"Position": 148216187,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
16891982
],
"Chromosome": 5,
"Position": 33951693,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
348613
],
"Chromosome": 5,
"Position": 40273518,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6555969
],
"Chromosome": 5,
"Position": 171128464,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
26722
],
"Chromosome": 5,
"Position": 33963870,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
28777
],
"Chromosome": 5,
"Position": 33958959,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
72777200
],
"Chromosome": 5,
"Position": 124561295,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
62330021
],
"Chromosome": 5,
"Position": 311787,
"GeneNamesList": [
"PDCD6"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,66 +0,0 @@
[
{
"RSIDsList": [
6918152
],
"Chromosome": 6,
"Position": 542159,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1540771
],
"Chromosome": 6,
"Position": 466033,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12203592
],
"Chromosome": 6,
"Position": 396321,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6910861,
111318576,
63129962,
58859209
],
"Chromosome": 6,
"Position": 10537950,
"GeneNamesList": [
"GCNT2"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
341147,
614213
],
"Chromosome": 6,
"Position": 158420693,
"GeneNamesList": [
"TULP4"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,808 +0,0 @@
[
{
"RSIDsList": [
80034486
],
"Chromosome": 7,
"Position": 117652877,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i5012079",
"i4000311"
]
},
"References": {
"SNPedia.com - rs80034486": "https://www.snpedia.com/index.php/Rs80034486"
}
},
{
"RSIDsList": [
121908745
],
"Chromosome": 7,
"Position": 117559590,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs121908745": "https://www.snpedia.com/index.php/Rs121908745"
}
},
{
"RSIDsList": [
74551128
],
"Chromosome": 7,
"Position": 117548795,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000291",
"i5006050",
"i5011205"
]
},
"References": {
"SNPedia.com - rs74551128": "https://www.snpedia.com/index.php/Rs74551128"
}
},
{
"RSIDsList": [
75096551
],
"Chromosome": 7,
"Position": 117606754,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i5011728",
"i6056297",
"i4000321"
]
},
"References": {
"SNPedia.com - rs75096551": "https://www.snpedia.com/index.php/Rs75096551"
}
},
{
"RSIDsList": [
76713772
],
"Chromosome": 7,
"Position": 117587738,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000317",
"i5011301",
"i6056292"
],
"2": [
"VG07S45090"
],
"3": [
"VG07S45090"
]
},
"References": {
"SNPedia.com - rs76713772": "https://www.snpedia.com/index.php/Rs76713772"
}
},
{
"RSIDsList": [
121909011
],
"Chromosome": 7,
"Position": 117540230,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000296",
"i5006070",
"i5011077"
]
},
"References": {
"SNPedia.com - rs121909011": "https://www.snpedia.com/index.php/Rs121909011"
}
},
{
"RSIDsList": [
75961395
],
"Chromosome": 7,
"Position": 117509123,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000294"
],
"2": [
"VG07S29458"
],
"3": [
"VG07S29458"
]
},
"References": {
"SNPedia.com - rs75961395": "https://www.snpedia.com/index.php/Rs75961395"
}
},
{
"RSIDsList": [
78655421
],
"Chromosome": 7,
"Position": 117530975,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i5010839",
"i5006049",
"i4000295",
"i5010838",
"i5010837"
],
"2": [
"VG07S29628"
],
"3": [
"VG07S29628"
]
},
"References": {
"SNPedia.com - rs78655421": "https://www.snpedia.com/index.php/Rs78655421"
}
},
{
"RSIDsList": [
75039782
],
"Chromosome": 7,
"Position": 117639961,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i5011981",
"i4000325"
],
"2": [
"VG07S52449"
],
"3": [
"VG07S52449"
]
},
"References": {
"SNPedia.com - rs75039782": "https://www.snpedia.com/index.php/Rs75039782"
}
},
{
"RSIDsList": [
80224560
],
"Chromosome": 7,
"Position": 117602868,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000320",
"i5011620"
]
},
"References": {
"SNPedia.com - i4000320": "https://www.snpedia.com/index.php/I4000320",
"SNPedia.com - rs80224560": "https://www.snpedia.com/index.php/Rs80224560"
}
},
{
"RSIDsList": [
77188391
],
"Chromosome": 7,
"Position": 117534366,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000315",
"i5010951"
],
"2": [
"VG07S44986"
],
"3": [
"VG07S44986"
]
},
"References": {
"SNPedia.com - rs77188391": "https://www.snpedia.com/index.php/Rs77188391"
}
},
{
"RSIDsList": [
74597325
],
"Chromosome": 7,
"Position": 117587811,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000306",
"i5006055",
"i5011335",
"i6056294"
],
"2": [
"VG07S29297"
],
"3": [
"VG07S29297"
]
},
"References": {
"SNPedia.com - rs74597325": "https://www.snpedia.com/index.php/Rs74597325"
}
},
{
"RSIDsList": [
121908747
],
"Chromosome": 7,
"Position": 117627581,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000322"
]
},
"References": {
"SNPedia.com - rs121908747": "https://www.snpedia.com/index.php/Rs121908747"
}
},
{
"RSIDsList": [
113993960,
199826652
],
"Chromosome": 7,
"Position": 117559592,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i3000001",
"i5011261"
]
},
"References": {
"SNPedia.com - rs113993960": "https://www.snpedia.com/index.php/Rs113993960"
}
},
{
"RSIDsList": [
77932196
],
"Chromosome": 7,
"Position": 117540270,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000297",
"i5011094",
"i5011095"
]
},
"References": {
"SNPedia.com - rs77932196": "https://www.snpedia.com/index.php/Rs77932196"
}
},
{
"RSIDsList": [
121908748
],
"Chromosome": 7,
"Position": 117590440,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000318",
"i5006139",
"i5011416",
"i5011417",
"i5011418"
]
},
"References": {
"SNPedia.com - rs121908748": "https://www.snpedia.com/index.php/Rs121908748"
}
},
{
"RSIDsList": [
113993959
],
"Chromosome": 7,
"Position": 117587778,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000300",
"i5006109",
"i5011314"
]
},
"References": {
"SNPedia.com - rs113993959": "https://www.snpedia.com/index.php/Rs113993959"
}
},
{
"RSIDsList": [
74767530
],
"Chromosome": 7,
"Position": 117627537,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i5011932",
"i4000308",
"i6056298"
],
"2": [
"VG07S29424"
],
"3": [
"VG07S29424"
]
},
"References": {
"SNPedia.com - rs74767530": "https://www.snpedia.com/index.php/Rs74767530"
}
},
{
"RSIDsList": [
77010898
],
"Chromosome": 7,
"Position": 117642566,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000309",
"i5012037",
"i6056299"
],
"2": [
"VG07S29451"
],
"3": [
"VG07S29451"
]
},
"References": {
"SNPedia.com - rs77010898": "https://www.snpedia.com/index.php/Rs77010898"
}
},
{
"RSIDsList": [
121908746
],
"Chromosome": 7,
"Position": 117592219,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {},
"References": {
"SNPedia.com - rs121908746": "https://www.snpedia.com/index.php/Rs121908746"
}
},
{
"RSIDsList": [
75527207
],
"Chromosome": 7,
"Position": 117587806,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000305",
"i5006054",
"i5011331"
],
"2": [
"VG07S29293"
],
"3": [
"VG07S29293"
]
},
"References": {
"SNPedia.com - rs75527207": "https://www.snpedia.com/index.php/Rs75527207"
}
},
{
"RSIDsList": [
78756941
],
"Chromosome": 7,
"Position": 117531115,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000314",
"i5010909",
"i6056291"
],
"2": [
"VG07S44961"
],
"3": [
"VG07S44961"
]
},
"References": {
"SNPedia.com - rs78756941": "https://www.snpedia.com/index.php/Rs78756941"
}
},
{
"RSIDsList": [
80055610
],
"Chromosome": 7,
"Position": 117587833,
"GeneNamesList": [
"CFTR"
],
"CompanyAliases": {
"1": [
"i4000307",
"i5011358",
"i5011359"
]
},
"References": {
"SNPedia.com - rs80055610": "https://www.snpedia.com/index.php/Rs80055610"
}
},
{
"RSIDsList": [
6944702
],
"Chromosome": 7,
"Position": 83653553,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6462562
],
"Chromosome": 7,
"Position": 4088555,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2854746
],
"Chromosome": 7,
"Position": 45960645,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6462544
],
"Chromosome": 7,
"Position": 4077620,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
10237838
],
"Chromosome": 7,
"Position": 4073998,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12155314
],
"Chromosome": 7,
"Position": 4081194,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
10266101
],
"Chromosome": 7,
"Position": 4073819,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2108166
],
"Chromosome": 7,
"Position": 42125871,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
10485860
],
"Chromosome": 7,
"Position": 4090283,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
9692219
],
"Chromosome": 7,
"Position": 4043701,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7803030
],
"Chromosome": 7,
"Position": 4038558,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2342494
],
"Chromosome": 7,
"Position": 4032591,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6950754
],
"Chromosome": 7,
"Position": 4037491,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7807181
],
"Chromosome": 7,
"Position": 4046812,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
10278187,
57744561
],
"Chromosome": 7,
"Position": 4034741,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1562005,
58720272
],
"Chromosome": 7,
"Position": 4044191,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7781059,
57876852,
10351382
],
"Chromosome": 7,
"Position": 4046687,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
10237319
],
"Chromosome": 7,
"Position": 4033969,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
10237488,
59841339
],
"Chromosome": 7,
"Position": 4034710,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
10234405,
58770991
],
"Chromosome": 7,
"Position": 4034827,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1562006,
59417113
],
"Chromosome": 7,
"Position": 4043872,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7779616,
56955120,
17293919,
10377747
],
"Chromosome": 7,
"Position": 4046408,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
10265937
],
"Chromosome": 7,
"Position": 4034017,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
7799331,
58503650,
10365277
],
"Chromosome": 7,
"Position": 4046491,
"GeneNamesList": [
"SDK1"
],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,36 +0,0 @@
[
{
"RSIDsList": [
147068120
],
"Chromosome": 8,
"Position": 81350433,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12543326
],
"Chromosome": 8,
"Position": 42003663,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6997494
],
"Chromosome": 8,
"Position": 12833488,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,108 +0,0 @@
[
{
"RSIDsList": [
12552712
],
"Chromosome": 9,
"Position": 27366436,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
6478394
],
"Chromosome": 9,
"Position": 121836674,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1158810
],
"Chromosome": 9,
"Position": 121809519,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
13297008
],
"Chromosome": 9,
"Position": 12677471,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2762462
],
"Chromosome": 9,
"Position": 12699776,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1408799
],
"Chromosome": 9,
"Position": 12672097,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
1325127
],
"Chromosome": 9,
"Position": 12668328,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
2733832
],
"Chromosome": 9,
"Position": 12704725,
"GeneNamesList": [
"MISSING"
],
"CompanyAliases": {},
"References": {}
},
{
"RSIDsList": [
12335410
],
"Chromosome": 9,
"Position": 129238777,
"GeneNamesList": [],
"CompanyAliases": {},
"References": {}
}
]

View file

@ -1,5 +1,5 @@
// locusMetadata provides information about gene locations.
// locusMetadata provides information about locations in the human genome.
package locusMetadata
@ -9,72 +9,13 @@ import "seekia/internal/helpers"
import _ "embed"
import "encoding/json"
import "encoding/gob"
import "errors"
import "bytes"
//go:embed LocusMetadata_Chromosome1.json
var LocusMetadataFile_Chromosome1 []byte
//go:embed LocusMetadata_Chromosome2.json
var LocusMetadataFile_Chromosome2 []byte
//go:embed LocusMetadata_Chromosome3.json
var LocusMetadataFile_Chromosome3 []byte
//go:embed LocusMetadata_Chromosome4.json
var LocusMetadataFile_Chromosome4 []byte
//go:embed LocusMetadata_Chromosome5.json
var LocusMetadataFile_Chromosome5 []byte
//go:embed LocusMetadata_Chromosome6.json
var LocusMetadataFile_Chromosome6 []byte
//go:embed LocusMetadata_Chromosome7.json
var LocusMetadataFile_Chromosome7 []byte
//go:embed LocusMetadata_Chromosome8.json
var LocusMetadataFile_Chromosome8 []byte
//go:embed LocusMetadata_Chromosome9.json
var LocusMetadataFile_Chromosome9 []byte
//go:embed LocusMetadata_Chromosome10.json
var LocusMetadataFile_Chromosome10 []byte
//go:embed LocusMetadata_Chromosome11.json
var LocusMetadataFile_Chromosome11 []byte
//go:embed LocusMetadata_Chromosome12.json
var LocusMetadataFile_Chromosome12 []byte
//go:embed LocusMetadata_Chromosome13.json
var LocusMetadataFile_Chromosome13 []byte
//go:embed LocusMetadata_Chromosome14.json
var LocusMetadataFile_Chromosome14 []byte
//go:embed LocusMetadata_Chromosome15.json
var LocusMetadataFile_Chromosome15 []byte
//go:embed LocusMetadata_Chromosome16.json
var LocusMetadataFile_Chromosome16 []byte
//go:embed LocusMetadata_Chromosome17.json
var LocusMetadataFile_Chromosome17 []byte
//go:embed LocusMetadata_Chromosome19.json
var LocusMetadataFile_Chromosome19 []byte
//go:embed LocusMetadata_Chromosome20.json
var LocusMetadataFile_Chromosome20 []byte
//go:embed LocusMetadata_Chromosome21.json
var LocusMetadataFile_Chromosome21 []byte
//go:embed LocusMetadata_Chromosome22.json
var LocusMetadataFile_Chromosome22 []byte
//go:embed LocusMetadata.gob
var LocusMetadataFile []byte
type LocusMetadata struct{
@ -93,10 +34,16 @@ type LocusMetadata struct{
// This is a number describing its location on the chromosome it exists on.
Position int
// This is true if we know any information about the gene this locus belongs to, and if there even is a gene
GeneInfoIsKnown bool
// This is true if the locus exists within a gene
// Some loci are non-coding, meaning they don't exist within a gene and code for a protein
GeneExists bool
// A list of gene names which refer to the gene which this locus belongs to.
// Each gene name refers to the same gene.
// Will be a list containing "MISSING" if the gene name has not been added yet
// Will be an empty list if no gene exists
// Will be a nil list if gene info is not known, or no gene exists
GeneNamesList []string
// A list of alternate names for the rsid used by companies
@ -115,7 +62,7 @@ const TwentyThreeAndMe GeneticsCompany = 1
const FamilyTreeDNA GeneticsCompany = 2
const MyHeritage GeneticsCompany = 3
// Map Structure: RSID -> LocusMetadata object
// Map Structure: RSID -> Locus Metadata Object
var lociMetadataMap map[int64]LocusMetadata
// This map stores a list of aliases for rsids which have aliases
@ -146,32 +93,32 @@ func InitializeLocusMetadataVariables()error{
rsidsList := locusObject.RSIDsList
for _, rsid := range rsidsList{
for _, rsID := range rsidsList{
_, exists := lociMetadataMap[rsid]
_, exists := lociMetadataMap[rsID]
if (exists == true){
return errors.New("lociMetadataMap contains duplicate rsid.")
return errors.New("lociMetadataMap contains duplicate rsID.")
}
lociMetadataMap[rsid] = locusObject
lociMetadataMap[rsID] = locusObject
}
if (len(rsidsList) > 1){
// We add rsid aliases to map
for _, rsid := range rsidsList{
for _, rsID := range rsidsList{
rsidAliasesList := make([]int64, 0)
for _, rsidInner := range rsidsList{
if (rsid != rsidInner){
if (rsID != rsidInner){
rsidAliasesList = append(rsidAliasesList, rsidInner)
}
}
rsidAliasesMap[rsid] = rsidAliasesList
rsidAliasesMap[rsID] = rsidAliasesList
}
}
@ -288,124 +235,17 @@ func GetCompanyAliasRSID(companyName string, locusAlias string)(bool, int64, err
return false, 0, errors.New("GetCompanyAliasRSID called with invalid companyName: " + companyName)
}
// This function is only public for use in testing
func GetLocusMetadataObjectsList()([]LocusMetadata, error){
chromosomesList := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22}
buffer := bytes.NewBuffer(LocusMetadataFile)
locusMetadataObjectsList := make([]LocusMetadata, 0, len(chromosomesList))
for _, chromosomesInt := range chromosomesList{
chromosomeLocusMetadataObjectsList, err := GetLocusMetadataObjectsListByChromosome(chromosomesInt)
if (err != nil){ return nil, err }
locusMetadataObjectsList = append(locusMetadataObjectsList, chromosomeLocusMetadataObjectsList...)
}
return locusMetadataObjectsList, nil
}
func GetLocusMetadataObjectsListByChromosome(chromosome int)([]LocusMetadata, error){
if (chromosome < 1 || chromosome > 22){
chromosomeString := helpers.ConvertIntToString(chromosome)
return nil, errors.New("GetLocusMetadataObjectsListByChromosome called with invalid chromosome: " + chromosomeString)
}
// Outputs:
// -bool: File exists
// -[]byte: File bytes
getFileBytes := func()(bool, []byte){
switch chromosome{
case 1:{
return true, LocusMetadataFile_Chromosome1
}
case 2:{
return true, LocusMetadataFile_Chromosome2
}
case 3:{
return true, LocusMetadataFile_Chromosome3
}
case 4:{
return true, LocusMetadataFile_Chromosome4
}
case 5:{
return true, LocusMetadataFile_Chromosome5
}
case 6:{
return true, LocusMetadataFile_Chromosome6
}
case 7:{
return true, LocusMetadataFile_Chromosome7
}
case 8:{
return true, LocusMetadataFile_Chromosome8
}
case 9:{
return true, LocusMetadataFile_Chromosome9
}
case 10:{
return true, LocusMetadataFile_Chromosome10
}
case 11:{
return true, LocusMetadataFile_Chromosome11
}
case 12:{
return true, LocusMetadataFile_Chromosome12
}
case 13:{
return true, LocusMetadataFile_Chromosome13
}
case 14:{
return true, LocusMetadataFile_Chromosome14
}
case 15:{
return true, LocusMetadataFile_Chromosome15
}
case 16:{
return true, LocusMetadataFile_Chromosome16
}
case 17:{
return true, LocusMetadataFile_Chromosome17
}
//case 18:{
// return true, LocusMetadataFile_Chromosome18
//}
case 19:{
return true, LocusMetadataFile_Chromosome19
}
case 20:{
return true, LocusMetadataFile_Chromosome20
}
case 21:{
return true, LocusMetadataFile_Chromosome21
}
case 22:{
return true, LocusMetadataFile_Chromosome22
}
}
return false, nil
}
fileExists, fileBytes := getFileBytes()
if (fileExists == false){
// No loci exist for this chromosome
emptyList := make([]LocusMetadata, 0)
return emptyList, nil
}
decoder := gob.NewDecoder(buffer)
var locusMetadataObjectsList []LocusMetadata
err := json.Unmarshal(fileBytes, &locusMetadataObjectsList)
if (err != nil) { return nil, err }
err := decoder.Decode(&locusMetadataObjectsList)
if (err != nil){ return nil, err }
return locusMetadataObjectsList, nil
}

View file

@ -0,0 +1,247 @@
// modifyLocusMetadata provides functions to modify the locus metadata file
// This is a gob encoded file which contains information about genome loci
// Examples of this information are chromosome, position, and which gene the locus belongs to.
// We have to use golang to edit this file, we can't edit it manually.
// To run these functions, see:
// /utilities/addLocusMetadata/addLocusMetadata.go
// /utilities/importLocusMetadata/importLocusMetadata.go
package modifyLocusMetadata
import "seekia/resources/geneticReferences/locusMetadata"
import "seekia/internal/helpers"
import "encoding/gob"
import "bytes"
import "reflect"
import "errors"
//Outputs:
// -int: Quantity of added loci (this also includes loci which already existed but had new info to merge)
// -[]byte: File bytes of the new locus metadata file
// -error
func AddLocusMetadata(inputLociToAddList []locusMetadata.LocusMetadata)(int, []byte, error){
err := locusMetadata.InitializeLocusMetadataVariables()
if (err != nil){ return 0, nil, err }
lociToAddList := make([]locusMetadata.LocusMetadata, 0)
lociToDeleteList := make([]locusMetadata.LocusMetadata, 0)
// We use this map to make sure that each LocusMetadata object to add has unique rsIDs
newLocusMetadataRSIDsMap := make(map[int64]struct{})
for _, newLocusMetadataObject := range inputLociToAddList{
newLocusRSIDsList := newLocusMetadataObject.RSIDsList
for _, rsID := range newLocusRSIDsList{
_, exists := newLocusMetadataRSIDsMap[rsID]
if (exists == true){
return 0, nil, errors.New("inputLociToAddList contains multiple locus metadatas with a duplicate rsID.")
}
newLocusMetadataRSIDsMap[rsID] = struct{}{}
}
// First we check to see if locus metadata already exists
// Outputs:
// -bool: Locus metadata already exists for this locus
// -locusMetadata.LocusMetadata
// -error
getExistingLocusMetadata := func()(bool, locusMetadata.LocusMetadata, error){
for _, rsID := range newLocusRSIDsList{
exists, existingLocusMetadata, err := locusMetadata.GetLocusMetadata(rsID)
if (err != nil){ return false, locusMetadata.LocusMetadata{}, err }
if (exists == true){
return true, existingLocusMetadata, nil
}
}
return false, locusMetadata.LocusMetadata{}, nil
}
locusMetadataExists, existingLocusMetadata, err := getExistingLocusMetadata()
if (err != nil) { return 0, nil, err }
if (locusMetadataExists == false){
lociToAddList = append(lociToAddList, newLocusMetadataObject)
continue
}
// We check to see if the existing locus metadata contains identical chromosome/position
newChromosome := newLocusMetadataObject.Chromosome
existingChromosome := existingLocusMetadata.Chromosome
if (existingChromosome != newChromosome){
// New locus metadata conflicts with existing locus metadata
return 0, nil, errors.New("Trying to add a locus metadata with a conflicting chromosome.")
}
newPosition := newLocusMetadataObject.Position
existingPosition := existingLocusMetadata.Position
if (existingPosition != newPosition){
// New locus metadata conflicts with existing locus metadata
return 0, nil, errors.New("Trying to add a locus metadata with a conflicting position.")
}
newRSIDsList := newLocusMetadataObject.RSIDsList
existingRSIDsList := existingLocusMetadata.RSIDsList
combinedRSIDsList := helpers.CombineTwoListsAndAvoidDuplicates(newRSIDsList, existingRSIDsList)
newLocusMetadataObject.RSIDsList = combinedRSIDsList
newGeneInfoIsKnown := newLocusMetadataObject.GeneInfoIsKnown
existingGeneInfoIsKnown := existingLocusMetadata.GeneInfoIsKnown
if (newGeneInfoIsKnown == false && existingGeneInfoIsKnown == true){
// We add existing gene info to new locus metadata object
existingGeneExists := existingLocusMetadata.GeneExists
existingGeneNamesList := existingLocusMetadata.GeneNamesList
newLocusMetadataObject.GeneInfoIsKnown = true
newLocusMetadataObject.GeneExists = existingGeneExists
newLocusMetadataObject.GeneNamesList = existingGeneNamesList
} else if (newGeneInfoIsKnown == true && existingGeneInfoIsKnown == true){
// We check for conflicts
existingGeneExists := existingLocusMetadata.GeneExists
newGeneExists := newLocusMetadataObject.GeneExists
if (existingGeneExists != newGeneExists){
// New locus metadata conflicts with existing locus metadata
return 0, nil, errors.New("Trying to add a locus metadata with a GeneExists.")
}
if (existingGeneExists == true){
existingGeneNamesList := existingLocusMetadata.GeneNamesList
newGeneNamesList := newLocusMetadataObject.GeneNamesList
if (existingGeneNamesList == nil){
return 0, nil, errors.New("Locus Metadata contains item with known gene name(s) but with a nil GeneNamesList")
}
if (newGeneNamesList == nil){
return 0, nil, errors.New("New locus metadata item to add contains known gene name(s) but with a nil GeneNamesList")
}
combinedGeneNamesList := helpers.CombineTwoListsAndAvoidDuplicates(existingGeneNamesList, newGeneNamesList)
newLocusMetadataObject.GeneNamesList = combinedGeneNamesList
}
}
existingReferencesMap := existingLocusMetadata.References
newReferencesMap := newLocusMetadataObject.References
// We merge the references maps
for key, existingValue := range existingReferencesMap{
newValue, exists := newReferencesMap[key]
if (exists == false){
newReferencesMap[key] = existingValue
continue
}
if (existingValue != newValue){
return 0, nil, errors.New("Existing locus metadata references map contains different value for same key")
}
}
newLocusMetadataObject.References = newReferencesMap
newCompanyAliasesMap := newLocusMetadataObject.CompanyAliases
existingCompanyAliasesMap := existingLocusMetadata.CompanyAliases
for key, existingValue := range existingCompanyAliasesMap{
newValue, exists := newCompanyAliasesMap[key]
if (exists == false){
newCompanyAliasesMap[key] = existingValue
continue
}
// We combine the company alias lists
combinedCompanyAliasesList := helpers.CombineTwoListsAndAvoidDuplicates(existingValue, newValue)
newCompanyAliasesMap[key] = combinedCompanyAliasesList
}
newLocusMetadataObject.CompanyAliases = newCompanyAliasesMap
lociToAddList = append(lociToAddList, newLocusMetadataObject)
lociToDeleteList = append(lociToDeleteList, existingLocusMetadata)
}
existingLocusMetadataObjectsList, err := locusMetadata.GetLocusMetadataObjectsList()
if (err != nil) { return 0, nil, err }
newLocusMetadataObjectsList := make([]locusMetadata.LocusMetadata, 0)
for _, locusMetadataObject := range existingLocusMetadataObjectsList{
// We check to see if we should delete this item
checkIfLocusIsDeleted := func()bool{
for _, locusToDelete := range lociToDeleteList{
areEqual := reflect.DeepEqual(locusToDelete, locusMetadataObject)
if (areEqual == true){
return true
}
}
return false
}
locusIsDeleted := checkIfLocusIsDeleted()
if (locusIsDeleted == false){
newLocusMetadataObjectsList = append(newLocusMetadataObjectsList, locusMetadataObject)
}
}
newLocusMetadataObjectsList = append(newLocusMetadataObjectsList, lociToAddList...)
quantityOfAddedLoci := len(lociToAddList)
buffer := new(bytes.Buffer)
encoder := gob.NewEncoder(buffer)
err = encoder.Encode(newLocusMetadataObjectsList)
if (err != nil) { return 0, nil, err }
newLocusMetadataFileBytes := buffer.Bytes()
return quantityOfAddedLoci, newLocusMetadataFileBytes, nil
}
func PruneLocusMetadata()([]byte, error){
//TODO: Create package
// This function will build a list of every rsID used in every trait and disease, and
// prune the locus metadata files of loci which do not exist in that list
return nil, nil
}

View file

@ -0,0 +1,67 @@
package traits
import "seekia/internal/helpers"
import "maps"
import _ "embed"
import "encoding/gob"
import "bytes"
//go:embed rsIDs/GiantHeightStudyLoci.gob
var GiantHeightStudyLociFile []byte
func getHeightTraitObject()(Trait, error){
// Map Structure: rsID -> References Map
locusReferencesMap := make(map[int64]map[string]string)
referencesMap_List1 := make(map[string]string)
referencesMap_List1["GIANT consortium - Meta-analyses of Genome-Wide Association Studies - 2022 - Height"] = "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files"
// These SNPs are taken from the meta-analyses of Genome-Wide Association Studies (GWAS) created by the GIANT consortium
//https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files
// Download link:
// https://portals.broadinstitute.org/collaboration/giant/images/4/4e/GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_ALL.gz
//SHA-256 Checksum:
// db18859724675f2f9ba86eff28cb4dacac0629c0b25c9806a6cf2eed6bb8b71e
// See /utilities/extractGiantLoci/extractGiantLoci.go to see how they were extracted from the file
buffer := bytes.NewBuffer(GiantHeightStudyLociFile)
decoder := gob.NewDecoder(buffer)
var lociList_1 []int64
err := decoder.Decode(&lociList_1)
if (err != nil){ return Trait{}, err }
for _, rsID := range lociList_1{
locusReferencesMap[rsID] = maps.Clone(referencesMap_List1)
}
heightLociList := helpers.GetListOfMapKeys(locusReferencesMap)
referencesMap := make(map[string]string)
referencesMap["GIANT consortium - Meta-analyses of Genome-Wide Association Studies - 2022 - Height"] = "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files"
heightObject := Trait{
TraitName: "Height",
TraitDescription: "The distance between the top of a standing person head and the floor.",
DiscreteOrNumeric: "Numeric",
LocusReferencesMap: locusReferencesMap,
LociList: heightLociList,
LociList_Rules: []int64{},
RulesList: []TraitRule{},
OutcomesList: []string{},
ReferencesMap: referencesMap,
}
return heightObject, nil
}

View file

@ -0,0 +1,2 @@
### This folder contains files which are gob-encoded []int64
### These int64s are rsIDs which influence various traits.

View file

@ -91,7 +91,7 @@ var traitObjectsList []Trait
// Map Structure: Rule locus identifier -> RSID representing this locus
var locusRSIDsMap map[string]int64
func InitializeTraitVariables(){
func InitializeTraitVariables()error{
lactoseToleranceObject := getLactoseToleranceTraitObject()
hairTextureObject := getHairTextureTraitObject()
@ -100,7 +100,10 @@ func InitializeTraitVariables(){
hairColorObject := getHairColorTraitObject()
skinColorObject := getSkinColorTraitObject()
traitObjectsList = []Trait{lactoseToleranceObject, hairTextureObject, facialStructureObject, eyeColorObject, hairColorObject, skinColorObject}
heightObject, err := getHeightTraitObject()
if (err != nil){ return err }
traitObjectsList = []Trait{lactoseToleranceObject, hairTextureObject, facialStructureObject, eyeColorObject, hairColorObject, skinColorObject, heightObject}
traitNamesList = make([]string, 0, len(traitObjectsList))
locusRSIDsMap = make(map[string]int64)
@ -126,6 +129,8 @@ func InitializeTraitVariables(){
}
}
}
return nil
}
// Be aware that all of these functions are returning original objects/slices, not copies

1
utilities/addLocusMetadata/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
NewLocusMetadata.gob

View file

@ -1,17 +1,15 @@
// addLocusMetadata.go provides a function to manually add locus metadata to the .json files.
// addLocusMetadata.go provides a function to add locus metadata to the .gob locus metadata file.
// This utility creates a NewLocusMetadata.gob file, which must be renamed to LocusMetadata.gob and placed in the /resources/geneticReferences/locusMetadata folder
package main
import "seekia/resources/geneticReferences/locusMetadata"
import "seekia/resources/geneticReferences/modifyLocusMetadata"
import "seekia/internal/helpers"
import "seekia/internal/localFilesystem"
import "encoding/json"
import "slices"
import "log"
func main(){
@ -25,6 +23,8 @@ func main(){
RSIDsList: []int64{5957354},
Chromosome: X,
Position: 120305480,
GeneInfoIsKnown: true,
GeneExists: true,
GeneNamesList: []string{"TMEM255A"},
CompanyAliases: make(map[locusMetadata.GeneticsCompany][]string),
References: make(map[string]string),
@ -34,6 +34,8 @@ func main(){
RSIDsList: []int64{78542430},
Chromosome: X,
Position: 48259397,
GeneInfoIsKnown: true,
GeneExists: true,
GeneNamesList: []string{"SSX1"},
CompanyAliases: make(map[locusMetadata.GeneticsCompany][]string),
References: make(map[string]string),
@ -47,78 +49,33 @@ func main(){
//newLocusMetadataObject_2,
}
numberOfLociToAdd := len(lociToAddList)
quantityOfLociToAdd := len(lociToAddList)
err := locusMetadata.InitializeLocusMetadataVariables()
quantityOfAddedLoci, newLocusMetadataFileBytes, err := modifyLocusMetadata.AddLocusMetadata(lociToAddList)
if (err != nil){
log.Println("ERROR: " + err.Error())
log.Println("Error: " + err.Error())
return
}
// Map Structure: Chromosome -> List of locus metadata objects to add
lociToAddMap := make(map[int][]locusMetadata.LocusMetadata)
for _, locusObject := range lociToAddList{
// First we check to see if locus metadata already exists
locusRSIDsList := locusObject.RSIDsList
for _, rsID := range locusRSIDsList{
exists, _, err := locusMetadata.GetLocusMetadata(rsID)
if (err != nil){
log.Println("ERROR: " + err.Error())
return
}
if (exists == true){
rsIDString := helpers.ConvertInt64ToString(rsID)
log.Println("lociToAddList contains locus whose metadata already exists: " + rsIDString)
return
}
}
locusChromosome := locusObject.Chromosome
existingList, exists := lociToAddMap[locusChromosome]
if (exists == false){
lociToAddMap[locusChromosome] = []locusMetadata.LocusMetadata{locusObject}
} else {
existingList = append(existingList, locusObject)
lociToAddMap[locusChromosome] = existingList
}
err = localFilesystem.CreateOrOverwriteFile(newLocusMetadataFileBytes, "./", "NewLocusMetadata.gob")
if (err != nil){
log.Println("Error: " + err.Error())
return
}
for chromosomeInt, locusMetadataObjectsToAddList := range lociToAddMap{
quantityOfAddedLociString := helpers.ConvertIntToString(quantityOfAddedLoci)
existingLocusMetadataObjectsList, err := locusMetadata.GetLocusMetadataObjectsListByChromosome(chromosomeInt)
if (err != nil) {
log.Println(err)
return
}
log.Println("Successfully added " + quantityOfAddedLociString + " locus metadatas!")
log.Println("The new locus metadatas have been saved to the NewLocusMetadata.gob file.")
newLocusMetadataObjectsList := slices.Concat(existingLocusMetadataObjectsList, locusMetadataObjectsToAddList)
quantityOfSkippedLoci := quantityOfLociToAdd - quantityOfAddedLoci
newChromosomeFileBytes, err := json.MarshalIndent(newLocusMetadataObjectsList, "", "\t")
if (err != nil){
log.Println("ERROR: " + err.Error())
return
}
if (quantityOfSkippedLoci != 0){
currentChromosomeString := helpers.ConvertIntToString(chromosomeInt)
quantityOfSkippedLociString := helpers.ConvertIntToString(quantityOfSkippedLoci)
locusMetadataFilepath := "../../resources/geneticReferences/locusMetadata/"
err = localFilesystem.CreateOrOverwriteFile(newChromosomeFileBytes, locusMetadataFilepath, "LocusMetadata_Chromosome" + currentChromosomeString + ".json")
if (err != nil){
log.Println("ERROR: " + err.Error())
return
}
log.Println("Skipped " + quantityOfSkippedLociString + " locus metadatas because they already existed.")
}
numberOfAddedLociString := helpers.ConvertIntToString(numberOfLociToAdd)
log.Println("Successfully added " + numberOfAddedLociString + " locus metadatas!")
}

View file

@ -45,7 +45,11 @@ import "time"
func main(){
traits.InitializeTraitVariables()
err := traits.InitializeTraitVariables()
if (err != nil){
panic(err)
return
}
app := app.New()
@ -1209,7 +1213,21 @@ func setStartAndMonitorTrainModelPage(window fyne.Window, traitName string, prev
return false, true, trainingDataObject, nil
}
processCompleted, err := geneticPrediction.TrainNeuralNetwork(traitName, neuralNetworkObject, getNextTrainingDataFunction)
traitObject, err := traits.GetTraitObject(traitName)
if (err != nil) { return false, err }
getTraitIsNumericBool := func()bool{
traitIsDiscreteOrNumeric := traitObject.DiscreteOrNumeric
if (traitIsDiscreteOrNumeric == "Numeric"){
return true
}
return false
}
traitIsNumeric := getTraitIsNumericBool()
processCompleted, err := geneticPrediction.TrainNeuralNetwork(traitName, traitIsNumeric, neuralNetworkObject, getNextTrainingDataFunction)
if (err != nil) { return false, err }
if (processCompleted == false){
return false, nil
@ -1314,7 +1332,6 @@ func setTestModelsPage(window fyne.Window, previousPage func()){
window.SetContent(page)
}
func setStartAndMonitorTestModelPage(window fyne.Window, traitName string, previousPage func()){
title := getBoldLabelCentered("Testing Model")
@ -1354,269 +1371,280 @@ func setStartAndMonitorTestModelPage(window fyne.Window, traitName string, previ
testModelFunction := func(){
//Outputs:
// -bool: Process completed (true == was not stopped mid-way)
// -geneticPrediction.TraitPredictionAccuracyInfoMap
// -error
testModel := func()(bool, geneticPrediction.TraitPredictionAccuracyInfoMap, error){
type TraitAccuracyStatisticsValue struct{
// This stores the quantity of examples of this outcome
QuantityOfExamples int
// This stores the quantity of predictions that were made for this outcome
// In other words: The quantity of instances where our model predicted this outcome
QuantityOfPredictions int
// This stores the quantity of predictions that were correct when the genome had this outcome
QuantityOfCorrectGenomePredictions int
// This stores the quantity of predictions that were correct when the model predicted this outcome
QuantityOfCorrectOutcomePredictions int
}
// We use this map to count up the information about predictions
// We use information from this map to construct the final accuracy information map
traitPredictionInfoMap := make(map[geneticPrediction.TraitOutcomeInfo]TraitAccuracyStatisticsValue)
_, testingSetFilepathsList, err := getTrainingAndTestingDataFilepathLists(traitName)
if (err != nil) { return false, nil, err }
traitNameWithoutWhitespaces := strings.ReplaceAll(traitName, " ", "")
// We read the trained model for this trait
modelFilename := traitNameWithoutWhitespaces + "Model.gob"
trainedModelFilepath := goFilepath.Join("./TrainedModels/", modelFilename)
fileExists, fileContents, err := localFilesystem.GetFileContents(trainedModelFilepath)
if (err != nil) { return false, nil, err }
if (fileExists == false){
return false, nil, errors.New("TrainedModel not found: " + trainedModelFilepath)
}
neuralNetworkObject, err := geneticPrediction.DecodeBytesToNeuralNetworkObject(fileContents)
if (err != nil) { return false, nil, err }
numberOfTrainingDatas := len(testingSetFilepathsList)
numberOfTrainingDatasString := helpers.ConvertIntToString(numberOfTrainingDatas)
finalIndex := numberOfTrainingDatas - 1
for index, filePath := range testingSetFilepathsList{
testModelIsStoppedBoolMutex.RLock()
testModelIsStopped := testModelIsStoppedBool
testModelIsStoppedBoolMutex.RUnlock()
if (testModelIsStopped == true){
// User exited the process
return false, nil, nil
}
fileExists, fileContents, err := localFilesystem.GetFileContents(filePath)
if (err != nil) { return false, nil, err }
if (fileExists == false){
return false, nil, errors.New("TrainingData file not found: " + filePath)
}
trainingDataObject, err := geneticPrediction.DecodeBytesToTrainingDataObject(fileContents)
if (err != nil) { return false, nil, err }
trainingDataInputLayer := trainingDataObject.InputLayer
trainingDataExpectedOutputLayer := trainingDataObject.OutputLayer
predictionLayer, err := geneticPrediction.GetNeuralNetworkRawPrediction(&neuralNetworkObject, trainingDataInputLayer)
if (err != nil) { return false, nil, err }
numberOfPredictionNeurons := len(predictionLayer)
if (len(trainingDataExpectedOutputLayer) != numberOfPredictionNeurons){
return false, nil, errors.New("Neural network prediction output length does not match expected output length.")
}
correctOutcomeName, err := geneticPrediction.GetOutcomeNameFromOutputLayer(traitName, true, trainingDataExpectedOutputLayer)
if (err != nil) { return false, nil, err }
predictedOutcomeName, err := geneticPrediction.GetOutcomeNameFromOutputLayer(traitName, true, predictionLayer)
if (err != nil) { return false, nil, err }
getPredictionIsCorrectBool := func()bool{
if (predictedOutcomeName == correctOutcomeName){
return true
}
return false
}
predictionIsCorrect := getPredictionIsCorrectBool()
numberOfKnownLoci, numberOfKnownAndPhasedLoci, numberOfLoci, err := geneticPrediction.GetLociInfoFromInputLayer(trainingDataInputLayer)
if (err != nil) { return false, nil, err }
proportionOfLociTested := float64(numberOfKnownLoci)/float64(numberOfLoci)
percentageOfLociTested := int(100*proportionOfLociTested)
proportionOfPhasedLoci := float64(numberOfKnownAndPhasedLoci)/float64(numberOfKnownLoci)
percentageOfPhasedLoci := int(100*proportionOfPhasedLoci)
{
// We first add the information to the map for the correct outcome
newTraitOutcomeInfo_CorrectOutcome := geneticPrediction.TraitOutcomeInfo{
OutcomeName: correctOutcomeName,
PercentageOfLociTested: percentageOfLociTested,
PercentageOfPhasedLoci: percentageOfPhasedLoci,
}
getTraitAccuracyStatisticsValue_CorrectOutcome := func()TraitAccuracyStatisticsValue{
existingTraitAccuracyStatisticsValue, exists := traitPredictionInfoMap[newTraitOutcomeInfo_CorrectOutcome]
if (exists == false){
newTraitAccuracyStatisticsValue := TraitAccuracyStatisticsValue{}
return newTraitAccuracyStatisticsValue
}
return existingTraitAccuracyStatisticsValue
}
traitAccuracyStatisticsValue := getTraitAccuracyStatisticsValue_CorrectOutcome()
traitAccuracyStatisticsValue.QuantityOfExamples += 1
if (predictionIsCorrect == true){
traitAccuracyStatisticsValue.QuantityOfCorrectGenomePredictions += 1
}
traitPredictionInfoMap[newTraitOutcomeInfo_CorrectOutcome] = traitAccuracyStatisticsValue
}
{
// We now add the information to the map for the predicted outcome
newTraitOutcomeInfo_PredictedOutcome := geneticPrediction.TraitOutcomeInfo{
OutcomeName: predictedOutcomeName,
PercentageOfLociTested: percentageOfLociTested,
PercentageOfPhasedLoci: percentageOfPhasedLoci,
}
getTraitAccuracyStatisticsValue_PredictedOutcome := func()TraitAccuracyStatisticsValue{
existingTraitAccuracyStatisticsValue, exists := traitPredictionInfoMap[newTraitOutcomeInfo_PredictedOutcome]
if (exists == false){
newTraitAccuracyStatisticsValue := TraitAccuracyStatisticsValue{}
return newTraitAccuracyStatisticsValue
}
return existingTraitAccuracyStatisticsValue
}
traitAccuracyStatisticsValue := getTraitAccuracyStatisticsValue_PredictedOutcome()
traitAccuracyStatisticsValue.QuantityOfPredictions += 1
if (predictionIsCorrect == true){
traitAccuracyStatisticsValue.QuantityOfCorrectOutcomePredictions += 1
}
traitPredictionInfoMap[newTraitOutcomeInfo_PredictedOutcome] = traitAccuracyStatisticsValue
}
exampleIndexString := helpers.ConvertIntToString(index+1)
numberOfExamplesProgress := "Tested " + exampleIndexString + "/" + numberOfTrainingDatasString + " Examples"
progressDetailsBinding.Set(numberOfExamplesProgress)
newProgressFloat64 := float64(index)/float64(finalIndex)
progressPercentageBinding.Set(newProgressFloat64)
}
// Now we construct the TraitAccuracyInfoMap
// This map stores the accuracy for each outcome
traitPredictionAccuracyInfoMap := make(map[geneticPrediction.TraitOutcomeInfo]geneticPrediction.TraitPredictionAccuracyInfo)
for traitAccuracyData, value := range traitPredictionInfoMap{
quantityOfExamples := value.QuantityOfExamples
quantityOfPredictions := value.QuantityOfPredictions
quantityOfCorrectGenomePredictions := value.QuantityOfCorrectGenomePredictions
quantityOfCorrectOutcomePredictions := value.QuantityOfCorrectOutcomePredictions
if (quantityOfCorrectGenomePredictions > quantityOfExamples){
return false, nil, errors.New("traitPredictionInfoMap contains quantityOfCorrectGenomePredictions > quantityOfExamples")
}
if (quantityOfCorrectOutcomePredictions > quantityOfPredictions){
return false, nil, errors.New("traitPredictionInfoMap contains quantityOfCorrectOutcomePredictions > quantityOfPredictions")
}
newTraitPredictionAccuracyInfo := geneticPrediction.TraitPredictionAccuracyInfo{
QuantityOfExamples: quantityOfExamples,
QuantityOfPredictions: quantityOfPredictions,
}
if (quantityOfExamples > 0){
proportionOfCorrectGenomePredictions := float64(quantityOfCorrectGenomePredictions)/float64(quantityOfExamples)
percentageOfCorrectGenomePredictions := int(100*proportionOfCorrectGenomePredictions)
newTraitPredictionAccuracyInfo.ProbabilityOfCorrectGenomePrediction = percentageOfCorrectGenomePredictions
}
if (quantityOfPredictions > 0){
proportionOfCorrectOutcomePredictions := float64(quantityOfCorrectOutcomePredictions)/float64(quantityOfPredictions)
percentageOfCorrectOutcomePredictions := int(100*proportionOfCorrectOutcomePredictions)
newTraitPredictionAccuracyInfo.ProbabilityOfCorrectOutcomePrediction = percentageOfCorrectOutcomePredictions
}
traitPredictionAccuracyInfoMap[traitAccuracyData] = newTraitPredictionAccuracyInfo
}
// Testing is complete.
// We save the info map as a file in the ModelAccuracies folder
fileBytes, err := geneticPrediction.EncodeTraitPredictionAccuracyInfoMapToBytes(traitPredictionAccuracyInfoMap)
if (err != nil) { return false, nil, err }
_, err = localFilesystem.CreateFolder("./ModelAccuracies")
if (err != nil) { return false, nil, err }
modelAccuracyFilename := traitNameWithoutWhitespaces + "ModelAccuracy.gob"
err = localFilesystem.CreateOrOverwriteFile(fileBytes, "./ModelAccuracies/", modelAccuracyFilename)
if (err != nil) { return false, nil, err }
progressPercentageBinding.Set(1)
return true, traitPredictionAccuracyInfoMap, nil
}
processIsComplete, traitPredictionAccuracyInfoMap, err := testModel()
traitObject, err := traits.GetTraitObject(traitName)
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
return
}
if (processIsComplete == false){
// User exited the page
return
}
setViewModelTestingTraitResultsPage(window, traitName, traitPredictionAccuracyInfoMap, previousPage)
traitIsDiscreteOrNumeric := traitObject.DiscreteOrNumeric
if (traitIsDiscreteOrNumeric == "Discrete"){
//Outputs:
// -bool: Process completed (true == was not stopped mid-way)
// -geneticPrediction.TraitPredictionAccuracyInfoMap
// -error
testModel := func()(bool, geneticPrediction.DiscreteTraitPredictionAccuracyInfoMap, error){
type TraitAccuracyStatisticsValue struct{
// This stores the quantity of examples of this outcome
QuantityOfExamples int
// This stores the quantity of predictions that were made for this outcome
// In other words: The quantity of instances where our model predicted this outcome
QuantityOfPredictions int
// This stores the quantity of predictions that were correct when the genome had this outcome
QuantityOfCorrectGenomePredictions int
// This stores the quantity of predictions that were correct when the model predicted this outcome
QuantityOfCorrectOutcomePredictions int
}
// We use this map to count up the information about predictions
// We use information from this map to construct the final accuracy information map
traitPredictionInfoMap := make(map[geneticPrediction.DiscreteTraitOutcomeInfo]TraitAccuracyStatisticsValue)
_, testingSetFilepathsList, err := getTrainingAndTestingDataFilepathLists(traitName)
if (err != nil) { return false, nil, err }
traitNameWithoutWhitespaces := strings.ReplaceAll(traitName, " ", "")
// We read the trained model for this trait
modelFilename := traitNameWithoutWhitespaces + "Model.gob"
trainedModelFilepath := goFilepath.Join("./TrainedModels/", modelFilename)
fileExists, fileContents, err := localFilesystem.GetFileContents(trainedModelFilepath)
if (err != nil) { return false, nil, err }
if (fileExists == false){
return false, nil, errors.New("TrainedModel not found: " + trainedModelFilepath)
}
neuralNetworkObject, err := geneticPrediction.DecodeBytesToNeuralNetworkObject(fileContents)
if (err != nil) { return false, nil, err }
numberOfTrainingDatas := len(testingSetFilepathsList)
numberOfTrainingDatasString := helpers.ConvertIntToString(numberOfTrainingDatas)
finalIndex := numberOfTrainingDatas - 1
for index, filePath := range testingSetFilepathsList{
testModelIsStoppedBoolMutex.RLock()
testModelIsStopped := testModelIsStoppedBool
testModelIsStoppedBoolMutex.RUnlock()
if (testModelIsStopped == true){
// User exited the process
return false, nil, nil
}
fileExists, fileContents, err := localFilesystem.GetFileContents(filePath)
if (err != nil) { return false, nil, err }
if (fileExists == false){
return false, nil, errors.New("TrainingData file not found: " + filePath)
}
trainingDataObject, err := geneticPrediction.DecodeBytesToTrainingDataObject(fileContents)
if (err != nil) { return false, nil, err }
trainingDataInputLayer := trainingDataObject.InputLayer
trainingDataExpectedOutputLayer := trainingDataObject.OutputLayer
predictionLayer, err := geneticPrediction.GetNeuralNetworkRawPrediction(&neuralNetworkObject, false, trainingDataInputLayer)
if (err != nil) { return false, nil, err }
numberOfPredictionNeurons := len(predictionLayer)
if (len(trainingDataExpectedOutputLayer) != numberOfPredictionNeurons){
return false, nil, errors.New("Neural network prediction output length does not match expected output length.")
}
correctOutcomeName, err := geneticPrediction.GetOutcomeNameFromOutputLayer(traitName, true, trainingDataExpectedOutputLayer)
if (err != nil) { return false, nil, err }
predictedOutcomeName, err := geneticPrediction.GetOutcomeNameFromOutputLayer(traitName, true, predictionLayer)
if (err != nil) { return false, nil, err }
getPredictionIsCorrectBool := func()bool{
if (predictedOutcomeName == correctOutcomeName){
return true
}
return false
}
predictionIsCorrect := getPredictionIsCorrectBool()
numberOfKnownLoci, numberOfKnownAndPhasedLoci, numberOfLoci, err := geneticPrediction.GetLociInfoFromNetworkInputLayer(trainingDataInputLayer)
if (err != nil) { return false, nil, err }
proportionOfLociTested := float64(numberOfKnownLoci)/float64(numberOfLoci)
percentageOfLociTested := int(100*proportionOfLociTested)
proportionOfPhasedLoci := float64(numberOfKnownAndPhasedLoci)/float64(numberOfKnownLoci)
percentageOfPhasedLoci := int(100*proportionOfPhasedLoci)
{
// We first add the information to the map for the correct outcome
newTraitOutcomeInfo_CorrectOutcome := geneticPrediction.DiscreteTraitOutcomeInfo{
OutcomeName: correctOutcomeName,
PercentageOfLociTested: percentageOfLociTested,
PercentageOfPhasedLoci: percentageOfPhasedLoci,
}
getTraitAccuracyStatisticsValue_CorrectOutcome := func()TraitAccuracyStatisticsValue{
existingTraitAccuracyStatisticsValue, exists := traitPredictionInfoMap[newTraitOutcomeInfo_CorrectOutcome]
if (exists == false){
newTraitAccuracyStatisticsValue := TraitAccuracyStatisticsValue{}
return newTraitAccuracyStatisticsValue
}
return existingTraitAccuracyStatisticsValue
}
traitAccuracyStatisticsValue := getTraitAccuracyStatisticsValue_CorrectOutcome()
traitAccuracyStatisticsValue.QuantityOfExamples += 1
if (predictionIsCorrect == true){
traitAccuracyStatisticsValue.QuantityOfCorrectGenomePredictions += 1
}
traitPredictionInfoMap[newTraitOutcomeInfo_CorrectOutcome] = traitAccuracyStatisticsValue
}
{
// We now add the information to the map for the predicted outcome
newTraitOutcomeInfo_PredictedOutcome := geneticPrediction.DiscreteTraitOutcomeInfo{
OutcomeName: predictedOutcomeName,
PercentageOfLociTested: percentageOfLociTested,
PercentageOfPhasedLoci: percentageOfPhasedLoci,
}
getTraitAccuracyStatisticsValue_PredictedOutcome := func()TraitAccuracyStatisticsValue{
existingTraitAccuracyStatisticsValue, exists := traitPredictionInfoMap[newTraitOutcomeInfo_PredictedOutcome]
if (exists == false){
newTraitAccuracyStatisticsValue := TraitAccuracyStatisticsValue{}
return newTraitAccuracyStatisticsValue
}
return existingTraitAccuracyStatisticsValue
}
traitAccuracyStatisticsValue := getTraitAccuracyStatisticsValue_PredictedOutcome()
traitAccuracyStatisticsValue.QuantityOfPredictions += 1
if (predictionIsCorrect == true){
traitAccuracyStatisticsValue.QuantityOfCorrectOutcomePredictions += 1
}
traitPredictionInfoMap[newTraitOutcomeInfo_PredictedOutcome] = traitAccuracyStatisticsValue
}
exampleIndexString := helpers.ConvertIntToString(index+1)
numberOfExamplesProgress := "Tested " + exampleIndexString + "/" + numberOfTrainingDatasString + " Examples"
progressDetailsBinding.Set(numberOfExamplesProgress)
newProgressFloat64 := float64(index)/float64(finalIndex)
progressPercentageBinding.Set(newProgressFloat64)
}
// Now we construct the TraitAccuracyInfoMap
// This map stores the accuracy for each outcome
traitPredictionAccuracyInfoMap := make(map[geneticPrediction.DiscreteTraitOutcomeInfo]geneticPrediction.DiscreteTraitPredictionAccuracyInfo)
for traitAccuracyData, value := range traitPredictionInfoMap{
quantityOfExamples := value.QuantityOfExamples
quantityOfPredictions := value.QuantityOfPredictions
quantityOfCorrectGenomePredictions := value.QuantityOfCorrectGenomePredictions
quantityOfCorrectOutcomePredictions := value.QuantityOfCorrectOutcomePredictions
if (quantityOfCorrectGenomePredictions > quantityOfExamples){
return false, nil, errors.New("traitPredictionInfoMap contains quantityOfCorrectGenomePredictions > quantityOfExamples")
}
if (quantityOfCorrectOutcomePredictions > quantityOfPredictions){
return false, nil, errors.New("traitPredictionInfoMap contains quantityOfCorrectOutcomePredictions > quantityOfPredictions")
}
newTraitPredictionAccuracyInfo := geneticPrediction.DiscreteTraitPredictionAccuracyInfo{
QuantityOfExamples: quantityOfExamples,
QuantityOfPredictions: quantityOfPredictions,
}
if (quantityOfExamples > 0){
proportionOfCorrectGenomePredictions := float64(quantityOfCorrectGenomePredictions)/float64(quantityOfExamples)
percentageOfCorrectGenomePredictions := int(100*proportionOfCorrectGenomePredictions)
newTraitPredictionAccuracyInfo.ProbabilityOfCorrectGenomePrediction = percentageOfCorrectGenomePredictions
}
if (quantityOfPredictions > 0){
proportionOfCorrectOutcomePredictions := float64(quantityOfCorrectOutcomePredictions)/float64(quantityOfPredictions)
percentageOfCorrectOutcomePredictions := int(100*proportionOfCorrectOutcomePredictions)
newTraitPredictionAccuracyInfo.ProbabilityOfCorrectOutcomePrediction = percentageOfCorrectOutcomePredictions
}
traitPredictionAccuracyInfoMap[traitAccuracyData] = newTraitPredictionAccuracyInfo
}
// Testing is complete.
// We save the info map as a file in the ModelAccuracies folder
fileBytes, err := geneticPrediction.EncodeDiscreteTraitPredictionAccuracyInfoMapToBytes(traitPredictionAccuracyInfoMap)
if (err != nil) { return false, nil, err }
_, err = localFilesystem.CreateFolder("./ModelAccuracies")
if (err != nil) { return false, nil, err }
modelAccuracyFilename := traitNameWithoutWhitespaces + "ModelAccuracy.gob"
err = localFilesystem.CreateOrOverwriteFile(fileBytes, "./ModelAccuracies/", modelAccuracyFilename)
if (err != nil) { return false, nil, err }
progressPercentageBinding.Set(1)
return true, traitPredictionAccuracyInfoMap, nil
}
processIsComplete, traitPredictionAccuracyInfoMap, err := testModel()
if (err != nil){
setErrorEncounteredPage(window, err, previousPage)
return
}
if (processIsComplete == false){
// User exited the page
return
}
setViewModelTestingDiscreteTraitResultsPage(window, traitName, traitPredictionAccuracyInfoMap, previousPage)
}
}
go testModelFunction()
}
// This is a page to view the details of testing for a specific trait's model
func setViewModelTestingTraitResultsPage(window fyne.Window, traitName string, traitAccuracyInfoMap geneticPrediction.TraitPredictionAccuracyInfoMap, exitPage func()){
func setViewModelTestingDiscreteTraitResultsPage(window fyne.Window, traitName string, traitAccuracyInfoMap geneticPrediction.DiscreteTraitPredictionAccuracyInfoMap, exitPage func()){
title := getBoldLabelCentered("Trait Prediction Accuracy Details")
title := getBoldLabelCentered("Discrete Trait Prediction Accuracy Details")
exitButton := getWidgetCentered(widget.NewButtonWithIcon("Exit", theme.CancelIcon(), exitPage))

View file

@ -32,7 +32,12 @@ func main(){
monogenicDiseases.InitializeMonogenicDiseaseVariables()
polygenicDiseases.InitializePolygenicDiseaseVariables()
traits.InitializeTraitVariables()
err = traits.InitializeTraitVariables()
if (err != nil) {
log.Println("InitializeTraitVariables failed: " + err.Error())
return
}
//Outputs:
// -bool: File exists

3
utilities/extractGiantLoci/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
GiantHeightStudy.txt
GiantHeightStudyLoci.gob
NewLocusMetadata.gob

View file

@ -0,0 +1,283 @@
// extractGiantLoci.go provides a function to extract rsIDs from a Genome-Wide Association Study (GWAS) created by the GIANT consortium
// These studies are released as files on this website:
// https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files
// The files are a tab-delimeted file of rsIDs and their effect on a particular trait
// The output file is a .gob encoded []int64 of the top 1000 most impactful loci on the trait.
// These files are then saved into /resources/geneticReferences/traits/rsIDs
// The loci metadata for loci from these files is also imported into the locusMetadata package to enable them to be used in Seekia
package main
// Here is the file I used to extract causal rsIDs for height
// Download link:
// https://portals.broadinstitute.org/collaboration/giant/images/4/4e/GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_ALL.gz
//SHA-256 Checksum:
// db18859724675f2f9ba86eff28cb4dacac0629c0b25c9806a6cf2eed6bb8b71e
import "seekia/resources/geneticReferences/locusMetadata"
import "seekia/resources/geneticReferences/modifyLocusMetadata"
import "seekia/internal/helpers"
import "seekia/internal/localFilesystem"
import "log"
import "bufio"
import "os"
import "io"
import "math"
import "bytes"
import "strings"
import "encoding/gob"
import "errors"
import "slices"
func main(){
extractGiantLoci := func()error{
fileBytes, err := os.ReadFile("./GiantHeightStudy.txt")
if (err != nil) {
return errors.New("Could not open GiantHeightStudy.txt file: " + err.Error())
}
fileReader := bytes.NewReader(fileBytes)
bufioReader := bufio.NewReader(fileReader)
// We first read the header line
//These are the columns of the file:
// COLUMN DESCRIPTION FOR FILE NAMED GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_*.gz
// - SNPID
// -represented as CHR:POS:REF:ALT)
// - RSID
// -RS NUMBER, WHEN AVAILABLE
// - CHR
// -CHROMOSOME
// - The chromosome on which the SNP is located
// - POS
// -GENOMIC POSITION (BASE PAIR) - hg19/hg37 BUILD
// - EFFECT_ALLELE
// -The allele that is associated with the effect being studied (Example: the allele associated with increased height)
// - OTHER_ALLELE
// - EFFECT_ALLELE_FREQ
// -(3 significant figures)
// -ChatGPT says: The frequency of the effect allele in the study population, reported to 3 significant figures.
// - BETA
// -(6 significant figures)
// -ChatGPT says: The effect size or regression coefficient for the association between
// the SNP and the trait of interest, reported to 6 significant figures
// - SE
// -(3 significant figures)
// -Standard error of the effect size, reported to 3 significant figures
// - P
// -P-VALUE MARGINAL EFFECT
// -ChatGPT says: The p-value for the marginal effect of the SNP on the trait of interest
// - N
// -Total sample size used in the GWAS analysis
_, err = bufioReader.ReadString('\n')
if (err != nil) { return err }
type LocusInfo struct{
Chromosome int
Position int
Effect float64
}
rsidsInfoMap := make(map[int64]LocusInfo)
for {
rsidInfoLine, err := bufioReader.ReadString('\n')
if (err != nil) {
if (err == io.EOF){
// We have reached the end of the file
break
}
// File is corrupt
return errors.New("Error reading file: " + err.Error())
}
lineElementsSlice := strings.Split(string(rsidInfoLine), "\t")
rsidString := lineElementsSlice[1]
rsidChromosomeString := lineElementsSlice[2]
rsidPositionString := lineElementsSlice[3]
rsidEffectString := lineElementsSlice[7]
rsidWithoutPrefix, prefixFound := strings.CutPrefix(rsidString, "rs")
if (prefixFound == false){
// Some of the rsIDs are not formatted in the "rs123456" format
// We skip those
//log.Println("rs prefix not found in rsID: " + rsIDString)
continue
}
rsID, err := helpers.ConvertStringToInt64(rsidWithoutPrefix)
if (err != nil){
return errors.New("RSID is invalid: " + err.Error())
}
rsidChromosome, err := helpers.ConvertStringToInt(rsidChromosomeString)
if (err != nil){ return err }
rsidPosition, err := helpers.ConvertStringToInt(rsidPositionString)
if (err != nil){ return err }
rsidEffectRaw, err := helpers.ConvertStringToFloat64(rsidEffectString)
if (err != nil) {
if (rsidEffectString == ""){
// The database has at least 1 entry with no effect provided
continue
}
return err
}
// Effect can be negative, we make it positive
rsidEffect := math.Abs(rsidEffectRaw)
existingLocusValue, exists := rsidsInfoMap[rsID]
if (exists == false){
newLocusInfo := LocusInfo{
Chromosome: rsidChromosome,
Position: rsidPosition,
Effect: rsidEffect,
}
rsidsInfoMap[rsID] = newLocusInfo
} else {
// We see if the effect of this allele is greater
// If it is, we update the effect to match the higher effect allele
// We do this because we want the most causal rsIDs, not the most causal alleles
// When we feed the locus into the neural network, both alleles will be eligible to be trained upon
existingChromosome := existingLocusValue.Chromosome
existingPosition := existingLocusValue.Position
existingEffect := existingLocusValue.Effect
if (existingChromosome != rsidChromosome){
return errors.New("GIANT gwas contains two rsIDs with conflicting chromosomes.")
}
if (existingPosition != rsidPosition){
return errors.New("GIANT gwas contains two rsIDs with conflicting positions.")
}
if (existingEffect < rsidEffect){
// We update the value with the new effect
existingLocusValue.Effect = rsidEffect
rsidsInfoMap[rsID] = existingLocusValue
}
}
}
// We find the top 10,000 rsIDs with the greatest effect
rsidsList := helpers.GetListOfMapKeys(rsidsInfoMap)
compareFunction := func(rsid1 int64, rsid2 int64)int{
if (rsid1 == rsid2){
panic("Identical rsIDs found during sort.")
}
rsid1Info, exists := rsidsInfoMap[rsid1]
if (exists == false){
panic("rsid1 is missing from rsidsInfoMap.")
}
rsid2Info, exists := rsidsInfoMap[rsid2]
if (exists == false){
panic("rsid2 is missing from rsidsInfoMap.")
}
rsid1Effect := rsid1Info.Effect
rsid2Effect := rsid2Info.Effect
if (rsid1Effect == rsid2Effect){
return 0
}
if (rsid1Effect < rsid2Effect){
return 1
}
return -1
}
slices.SortFunc(rsidsList, compareFunction)
// We take the top 1000 most impactful loci
mostImpactfulLoci := rsidsList[:1000]
// We add these rsIDs to the locus metadata
locusMetadatasToAddList := make([]locusMetadata.LocusMetadata, 0)
for _, rsID := range mostImpactfulLoci{
locusInfo, exists := rsidsInfoMap[rsID]
if (exists == false){
return errors.New("rsidsInfoMap missing rsID.")
}
locusChromosome := locusInfo.Chromosome
locusPosition := locusInfo.Position
locusReferencesMap := map[string]string{
"Height Genome-Wide Association Study (GWAS) created by the GIANT consortium": "https://portals.broadinstitute.org/collaboration/giant/index.php/GIANT_consortium_data_files",
}
newLocusMetadata := locusMetadata.LocusMetadata{
RSIDsList: []int64{rsID},
Chromosome: locusChromosome,
Position: locusPosition,
GeneInfoIsKnown: false,
GeneExists: false,
GeneNamesList: make([]string, 0),
CompanyAliases: make(map[locusMetadata.GeneticsCompany][]string),
References: locusReferencesMap,
}
locusMetadatasToAddList = append(locusMetadatasToAddList, newLocusMetadata)
}
_, newLocusMetadataFileBytes, err := modifyLocusMetadata.AddLocusMetadata(locusMetadatasToAddList)
if (err != nil) { return err }
err = localFilesystem.CreateOrOverwriteFile(newLocusMetadataFileBytes, "./", "NewLocusMetadata.gob")
if (err != nil){ return err }
buffer := new(bytes.Buffer)
gobEncoder := gob.NewEncoder(buffer)
err = gobEncoder.Encode(mostImpactfulLoci)
if (err != nil) { return err }
encodedBytes := buffer.Bytes()
err = localFilesystem.CreateOrOverwriteFile(encodedBytes, "./", "GiantHeightStudyLoci.gob")
if (err != nil){ return err }
return nil
}
err := extractGiantLoci()
if (err != nil){
log.Println("Extraction failed: " + err.Error())
return
}
log.Println("Extraction successful!")
}

View file

@ -1 +1,2 @@
23andMeRawGenome.txt
NewLocusMetadata.gob

View file

@ -1,279 +1,127 @@
// importLocusMetadata.go provides a function to import locus metadata from raw genome files.
// It uses a 23andMe raw genome file to find the chromosomes and positions for new rsIDs.
// The imported loci will be missing the GeneNames list and any references.
// The imported loci may be missing locus aliases
// The 23andMe file only contains Chromosome and Position information.
// This utility creates a NewLocusMetadata.gob file, which must be renamed to LocusMetadata.gob and placed in the /resources/geneticReferences/locusMetadata folder
// TODO: Instead of using 23andMe files, use a better full-genome reference which has gene names.
package main
import "seekia/resources/geneticReferences/locusMetadata"
import "seekia/resources/geneticReferences/modifyLocusMetadata"
import "seekia/internal/genetics/readRawGenomes"
import "seekia/internal/helpers"
import "seekia/internal/localFilesystem"
import "encoding/json"
import "slices"
import "errors"
import "strings"
import "bufio"
import "bytes"
import "log"
func main(){
fileExists, fileBytes, err := localFilesystem.GetFileContents("./23andMeRawGenome.txt")
if (err != nil){
log.Println(err.Error())
return
}
if (fileExists == false){
log.Println("Error: 23AndMeRawGenome.txt does not exist.")
log.Println("You must add a 23andMe raw genome file to the addLocusMetadata folder so we can retrieve locus metadata from the file.")
return
}
importLocusMetadataFunction := func()error{
fileReader := bytes.NewReader(fileBytes)
fileBufioReader := bufio.NewReader(fileReader)
firstLine, err := fileBufioReader.ReadString('\n')
if (err != nil){
// File does not have another line
log.Println("Malformed 23andMe genome file: Too short.")
return
}
fileIs23andMe := strings.HasPrefix(firstLine, "# This data file generated by 23andMe at:")
if (fileIs23andMe == false){
log.Println("Malformed 23andMe genome file: Missing header.")
return
}
// Now we advance bufio reader to the snp rows
for {
fileLineString, err := fileBufioReader.ReadString('\n')
if (err != nil){
// File does not have another line
log.Println("Malformed 23andMe genome file: Too short.")
return
fileExists, fileBytes, err := localFilesystem.GetFileContents("./23andMeRawGenome.txt")
if (err != nil){ return err }
if (fileExists == false){
return errors.New("Error: 23AndMeRawGenome.txt does not exist. You must add a 23andMe raw genome file to the importLocusMetadata folder so we can retrieve locus metadata from the file.")
}
// All SNP rows come after this line:
// "# rsid chromosome position genotype"
lineReached := strings.HasPrefix(fileLineString, "# rsid")
if (lineReached == true){
break
}
}
fileReader := bytes.NewReader(fileBytes)
type LocusInfoObject struct{
Chromosome int
Position int
}
// Map structure: Locus rsID -> Info Object
lociInfoMap := make(map[int64]LocusInfoObject)
for {
fileLineString, err := fileBufioReader.ReadString('\n')
if (err != nil){
// File does not have another line
break
}
if (fileLineString == "\n"){
// This is the final line
break
ableToReadFile, locusLocationsMap, err := readRawGenomes.ReadRawGenomeFileLocusLocations(fileReader)
if (err != nil){ return err }
if (ableToReadFile == false){
return errors.New("Unable to read 23andMe file.")
}
fileLineWithoutNewline := strings.TrimSuffix(fileLineString, "\n")
// This is a list of rsIDs whose metadata we should add to the locus metadata
lociToAddList := []int64{}
// Rows look like this
// "rs4477212 1 82154 GG"
// "rs571313759 1 1181945 --" (-- means no entry)
// "i3001920 MT 16470 G" (one base is possible)
rowSlice := strings.Split(fileLineWithoutNewline, "\t")
if (len(rowSlice) != 4){
log.Println("Malformed 23andMe genome data: Invalid SNP row: " + fileLineString)
return
containsDuplicates, _ := helpers.CheckIfListContainsDuplicates(lociToAddList)
if (containsDuplicates == true){
return errors.New("lociToAddList contains duplicates.")
}
locusIdentifierString := rowSlice[0]
locusChromosomeString := rowSlice[1]
locusPositionString := rowSlice[2]
// This list will store the loci for which no metadata existed
missingLociList := make([]int64, 0)
//Outputs:
// -bool: rsID found
// -int64: rsID value
getRSIDIdentifier := func()(bool, int64){
// This is a list of locus metadata objects to add
locusMetadatasToAddList := make([]locusMetadata.LocusMetadata, 0)
stringWithoutPrefix, prefixExists := strings.CutPrefix(locusIdentifierString, "rs")
if (prefixExists == false){
return false, 0
for _, rsID := range lociToAddList{
locusLocationObject, exists := locusLocationsMap[rsID]
if (exists == false){
// The 23andMe file does not contain metadata for this locus
missingLociList = append(missingLociList, rsID)
continue
}
rsidInt64, err := helpers.ConvertStringToInt64(stringWithoutPrefix)
if (err != nil){
return false, 0
locusChromosome := locusLocationObject.Chromosome
locusPosition := locusLocationObject.Position
newLocusMetadataObject := locusMetadata.LocusMetadata{
RSIDsList: []int64{rsID},
Chromosome: locusChromosome,
Position: locusPosition,
GeneInfoIsKnown: false,
GeneExists: false,
GeneNamesList: make([]string, 0),
CompanyAliases: make(map[locusMetadata.GeneticsCompany][]string),
References: make(map[string]string),
}
return true, rsidInt64
locusMetadatasToAddList = append(locusMetadatasToAddList, newLocusMetadataObject)
}
isRSID, locusRSID := getRSIDIdentifier()
if (isRSID == false){
// RSID is unknown.
// It is probably a custom identifier (Example: i713426)
continue
quantityOfAddedLoci, newLocusMetadataFileBytes, err := modifyLocusMetadata.AddLocusMetadata(locusMetadatasToAddList)
if (err != nil) { return err }
err = localFilesystem.CreateOrOverwriteFile(newLocusMetadataFileBytes, "./", "NewLocusMetadata.gob")
if (err != nil){ return err }
totalLociToAdd := len(lociToAddList)
totalLociToAddString := helpers.ConvertIntToString(totalLociToAdd)
numberOfImportedLociString := helpers.ConvertIntToString(quantityOfAddedLoci)
log.Println("Successfully imported " + numberOfImportedLociString + "/" + totalLociToAddString + " locus metadatas!")
numberOfMissingLoci := len(missingLociList)
numberOfMissingLociString := helpers.ConvertIntToString(numberOfMissingLoci)
log.Println(numberOfMissingLociString + " loci contained no metadata in the 23andMe genome file.")
if (len(missingLociList) > 0){
missingLociStringsList := make([]string, 0, len(missingLociList))
for _, rsID := range missingLociList{
rsIDString := helpers.ConvertInt64ToString(rsID)
missingLociStringsList = append(missingLociStringsList, rsIDString)
}
missingLociListFormatted := strings.Join(missingLociStringsList, ", ")
log.Println("Missing loci list: " + missingLociListFormatted)
}
locusChromosome, err := helpers.ConvertStringToInt(locusChromosomeString)
if (err != nil){
// It is probably "MT" or "X" chromosome
continue
}
locusPosition, err := helpers.ConvertStringToInt(locusPositionString)
if (err != nil){
log.Println("23andMe file is malformed: Contains invalid locusPosition: " + locusPositionString)
return
}
locusInfoObject := LocusInfoObject{
Chromosome: locusChromosome,
Position: locusPosition,
}
lociInfoMap[locusRSID] = locusInfoObject
return nil
}
// This is a list of rsIDs whose metadata we should add to the locus metadata
lociToAddList := []int64{}
containsDuplicates, _ := helpers.CheckIfListContainsDuplicates(lociToAddList)
if (containsDuplicates == true){
log.Println("lociToAddList contains duplicates.")
return
}
err = locusMetadata.InitializeLocusMetadataVariables()
err := importLocusMetadataFunction()
if (err != nil){
log.Println("ERROR: " + err.Error())
return
}
// This list will store the loci for which no metadata existed
missingLociList := make([]int64, 0)
// Map Structure: Chromosome -> List of locus metadata objects to add
lociToAddMap := make(map[int][]locusMetadata.LocusMetadata)
numberOfImportedLoci := 0
for _, rsID := range lociToAddList{
// First we check to see if locus metadata already exists
exists, _, err := locusMetadata.GetLocusMetadata(rsID)
if (err != nil){
log.Println("ERROR: " + err.Error())
return
}
if (exists == true){
log.Println("lociToAddList contains locus whose metadata already exists.")
return
}
locusInfoObject, exists := lociInfoMap[rsID]
if (exists == false){
// The 23andMe file does not contain metadata for this locus
missingLociList = append(missingLociList, rsID)
continue
}
numberOfImportedLoci += 1
locusChromosome := locusInfoObject.Chromosome
locusPosition := locusInfoObject.Position
newLocusMetadataObject := locusMetadata.LocusMetadata{
RSIDsList: []int64{rsID},
Chromosome: locusChromosome,
Position: locusPosition,
GeneNamesList: []string{"MISSING"},
CompanyAliases: make(map[locusMetadata.GeneticsCompany][]string),
References: make(map[string]string),
}
existingList, exists := lociToAddMap[locusChromosome]
if (exists == false){
lociToAddMap[locusChromosome] = []locusMetadata.LocusMetadata{newLocusMetadataObject}
} else {
existingList = append(existingList, newLocusMetadataObject)
lociToAddMap[locusChromosome] = existingList
}
}
for chromosomeInt, locusMetadataObjectsToAddList := range lociToAddMap{
existingLocusMetadataObjectsList, err := locusMetadata.GetLocusMetadataObjectsListByChromosome(chromosomeInt)
if (err != nil) {
log.Println(err)
return
}
newLocusMetadataObjectsList := slices.Concat(existingLocusMetadataObjectsList, locusMetadataObjectsToAddList)
newChromosomeFileBytes, err := json.MarshalIndent(newLocusMetadataObjectsList, "", "\t")
if (err != nil){
log.Println("ERROR: " + err.Error())
return
}
currentChromosomeString := helpers.ConvertIntToString(chromosomeInt)
locusMetadataFilepath := "../../resources/geneticReferences/locusMetadata/"
err = localFilesystem.CreateOrOverwriteFile(newChromosomeFileBytes, locusMetadataFilepath, "LocusMetadata_Chromosome" + currentChromosomeString + ".json")
if (err != nil){
log.Println("ERROR: " + err.Error())
return
}
}
totalLociToAdd := len(lociToAddList)
totalLociToAddString := helpers.ConvertIntToString(totalLociToAdd)
numberOfImportedLociString := helpers.ConvertIntToString(numberOfImportedLoci)
log.Println("Successfully imported " + numberOfImportedLociString + "/" + totalLociToAddString + " locus metadatas!")
numberOfMissingLoci := len(missingLociList)
numberOfMissingLociString := helpers.ConvertIntToString(numberOfMissingLoci)
log.Println(numberOfMissingLociString + " loci contained no metadata in the 23andMe genome file.")
if (len(missingLociList) == 0){
return
}
missingLociStringsList := make([]string, 0, len(missingLociList))
for _, rsID := range missingLociList{
rsIDString := helpers.ConvertInt64ToString(rsID)
missingLociStringsList = append(missingLociStringsList, rsIDString)
}
missingLociListFormatted := strings.Join(missingLociStringsList, ", ")
log.Println("Missing loci list: " + missingLociListFormatted)
}