2024-04-11 15:51:56 +02:00
|
|
|
|
|
|
|
// verifyGeneticReferences provides functions to run a check to make sure the genetic resources are valid and have no conflicts
|
|
|
|
|
|
|
|
package verifyGeneticReferences
|
|
|
|
|
|
|
|
// We check to make sure:
|
|
|
|
// 1. No identifier collisions exist
|
|
|
|
// 2. No disease/trait name collisions exist
|
|
|
|
// 4. Verifies the minimum and maximum risk weights for each polygenic disease locus
|
|
|
|
// 5. Each identifier is the correct format (3 bytes encoded hex)
|
|
|
|
|
|
|
|
// Identifiers are 3 bytes/24 bits long, so there is at least a 1 in 16 million chance that two will collide when generating them randomly
|
|
|
|
|
|
|
|
import "seekia/resources/geneticReferences/locusMetadata"
|
|
|
|
import "seekia/resources/geneticReferences/monogenicDiseases"
|
|
|
|
import "seekia/resources/geneticReferences/polygenicDiseases"
|
|
|
|
import "seekia/resources/geneticReferences/traits"
|
|
|
|
|
|
|
|
import "seekia/internal/helpers"
|
|
|
|
import "seekia/internal/encoding"
|
|
|
|
|
|
|
|
import "testing"
|
|
|
|
import "strings"
|
|
|
|
import "slices"
|
|
|
|
|
|
|
|
func TestGeneticReferences(t *testing.T){
|
|
|
|
|
|
|
|
verifyIdentifier := func(inputIdentifier string)bool{
|
|
|
|
|
2024-06-02 10:43:39 +02:00
|
|
|
_, err := encoding.DecodeHexStringTo3ByteArray(inputIdentifier)
|
2024-04-11 15:51:56 +02:00
|
|
|
if (err != nil) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
verifyBase := func(inputBase string)bool{
|
|
|
|
|
|
|
|
if (inputBase != "A" && inputBase != "G" && inputBase != "C" && inputBase != "T" && inputBase != "I" && inputBase != "D"){
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
verifyBasePair := func(inputBasePair string)bool{
|
|
|
|
|
|
|
|
baseA, baseB, delimiterFound := strings.Cut(inputBasePair, ";")
|
|
|
|
if (delimiterFound == false){
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
baseIsValid := verifyBase(baseA)
|
|
|
|
if (baseIsValid == false){
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
baseIsValid = verifyBase(baseB)
|
|
|
|
if (baseIsValid == false){
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
verifyReferencesMap := func(inputReferencesMap map[string]string)bool{
|
|
|
|
|
|
|
|
if (len(inputReferencesMap) == 0){
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
for referenceName, referenceLink := range inputReferencesMap{
|
|
|
|
|
|
|
|
if (referenceName == ""){
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if (referenceLink == ""){
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
|
|
|
|
|
|
|
monogenicDiseasesObjectsList, err := monogenicDiseases.GetMonogenicDiseaseObjectsList()
|
|
|
|
if (err != nil){
|
|
|
|
t.Fatalf("Failed to get monogenic disease objects list: " + err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
// We use this map to make sure all RSIDs have metadata in locusMetadata
|
|
|
|
allRSIDsMap := make(map[int64]struct{})
|
|
|
|
|
|
|
|
allIdentifiersMap := make(map[string]struct{})
|
|
|
|
|
|
|
|
monogenicDiseaseNamesMap := make(map[string]struct{})
|
|
|
|
|
|
|
|
for _, diseaseObject := range monogenicDiseasesObjectsList{
|
|
|
|
|
|
|
|
diseaseName := diseaseObject.DiseaseName
|
|
|
|
diseaseGeneName := diseaseObject.GeneName
|
|
|
|
dominantOrRecessive := diseaseObject.DominantOrRecessive
|
|
|
|
variantsList := diseaseObject.VariantsList
|
|
|
|
diseaseReferencesMap := diseaseObject.References
|
|
|
|
|
|
|
|
if (diseaseName == ""){
|
|
|
|
t.Fatalf("Monogenic Disease name is empty.")
|
|
|
|
}
|
|
|
|
_, exists := monogenicDiseaseNamesMap[diseaseName]
|
|
|
|
if (exists == true){
|
|
|
|
t.Fatalf("Monogenic Disease name collision found: " + diseaseName)
|
|
|
|
}
|
|
|
|
monogenicDiseaseNamesMap[diseaseName] = struct{}{}
|
|
|
|
|
|
|
|
// Monogenic disease names cannot contain underscores
|
|
|
|
// This is because when we encode monogenic disease names in user profiles, we replace the whitespace with underscores
|
|
|
|
// We have to be able to reliably undo this
|
|
|
|
containsUnderscore := strings.Contains(diseaseName, "_")
|
|
|
|
if (containsUnderscore == true){
|
|
|
|
t.Fatalf("Monogenic Disease name contains underscore: " + diseaseName)
|
|
|
|
}
|
|
|
|
|
|
|
|
if (diseaseGeneName == ""){
|
|
|
|
t.Fatalf("Monogenic Disease gene name is empty: " + diseaseName)
|
|
|
|
}
|
|
|
|
if (dominantOrRecessive != "Dominant" && dominantOrRecessive != "Recessive"){
|
|
|
|
t.Fatalf("Monogenic Disease dominantOrRecessive is invalid: " + diseaseName)
|
|
|
|
}
|
|
|
|
|
|
|
|
referencesAreValid := verifyReferencesMap(diseaseReferencesMap)
|
|
|
|
if (referencesAreValid == false){
|
|
|
|
t.Fatalf("Monogenic Disease references are invalid: " + diseaseName)
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len(variantsList) == 0){
|
|
|
|
t.Fatalf("Monogenic Disease contains no variants: " + diseaseName)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, variantObject := range variantsList{
|
|
|
|
|
|
|
|
variantIdentifier := variantObject.VariantIdentifier
|
|
|
|
variantRSID := variantObject.VariantRSID
|
|
|
|
variantNamesList := variantObject.VariantNames
|
|
|
|
variantHealthyBase := variantObject.HealthyBase
|
|
|
|
variantDefectiveBase := variantObject.DefectiveBase
|
|
|
|
variantReferences := variantObject.References
|
|
|
|
|
|
|
|
allRSIDsMap[variantRSID] = struct{}{}
|
|
|
|
|
|
|
|
identifierIsValid := verifyIdentifier(variantIdentifier)
|
|
|
|
if (identifierIsValid == false){
|
|
|
|
t.Fatalf(diseaseName + " Invalid variant identifier found: " + variantIdentifier)
|
|
|
|
}
|
|
|
|
|
|
|
|
_, exists := allIdentifiersMap[variantIdentifier]
|
|
|
|
if (exists == true){
|
|
|
|
t.Fatalf(diseaseName + " Duplicate variant identifier found: " + variantIdentifier)
|
|
|
|
}
|
|
|
|
allIdentifiersMap[variantIdentifier] = struct{}{}
|
|
|
|
|
|
|
|
if (len(variantNamesList) == 0){
|
|
|
|
t.Fatalf("Variant names list is empty: " + variantIdentifier)
|
|
|
|
}
|
|
|
|
for _, variantName := range variantNamesList{
|
|
|
|
if (variantName == ""){
|
|
|
|
t.Fatalf("Variant name is empty: " + variantIdentifier)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
healthyBaseIsValid := verifyBase(variantHealthyBase)
|
|
|
|
defectiveBaseIsValid := verifyBase(variantDefectiveBase)
|
|
|
|
|
|
|
|
if (healthyBaseIsValid == false || defectiveBaseIsValid == false){
|
|
|
|
t.Fatalf(diseaseName + " Invalid healthy/defective base found: " + variantIdentifier)
|
|
|
|
}
|
|
|
|
|
|
|
|
if (variantHealthyBase == variantDefectiveBase){
|
|
|
|
t.Fatalf(diseaseName + " Identical healthy/defective bases found: " + variantIdentifier)
|
|
|
|
}
|
|
|
|
|
|
|
|
referencesAreValid := verifyReferencesMap(variantReferences)
|
|
|
|
if (referencesAreValid == false){
|
|
|
|
t.Fatalf("Disease variant references map is invalid: " + variantIdentifier)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-08-14 05:37:18 +02:00
|
|
|
err = polygenicDiseases.InitializePolygenicDiseaseVariables()
|
|
|
|
if (err != nil){
|
|
|
|
t.Fatalf("InitializePolygenicDiseaseVariables failed: " + err.Error())
|
|
|
|
}
|
2024-04-11 15:51:56 +02:00
|
|
|
|
|
|
|
polygenicDiseaseObjectsList, err := polygenicDiseases.GetPolygenicDiseaseObjectsList()
|
|
|
|
if (err != nil) {
|
|
|
|
t.Fatalf("Failed to get polygenicDisease objects list: " + err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
polygenicDiseaseNamesMap := make(map[string]struct{})
|
|
|
|
|
|
|
|
for _, diseaseObject := range polygenicDiseaseObjectsList{
|
|
|
|
|
|
|
|
diseaseName := diseaseObject.DiseaseName
|
|
|
|
diseaseDescription := diseaseObject.DiseaseDescription
|
|
|
|
diseaseEffectedSex := diseaseObject.EffectedSex
|
2024-08-13 15:25:47 +02:00
|
|
|
diseaseLocusReferencesMap := diseaseObject.LocusReferencesMap
|
2024-04-11 15:51:56 +02:00
|
|
|
diseaseLociList := diseaseObject.LociList
|
|
|
|
diseaseReferencesMap := diseaseObject.References
|
|
|
|
|
|
|
|
if (diseaseName == ""){
|
|
|
|
t.Fatalf("PolygenicDisease name is empty.")
|
|
|
|
}
|
|
|
|
_, exists := polygenicDiseaseNamesMap[diseaseName]
|
|
|
|
if (exists == true){
|
|
|
|
t.Fatalf("PolygenicDisease name collision found: " + diseaseName)
|
|
|
|
}
|
|
|
|
polygenicDiseaseNamesMap[diseaseName] = struct{}{}
|
|
|
|
|
|
|
|
if (diseaseDescription == ""){
|
|
|
|
t.Fatalf("PolygenicDisease description is empty for disease: " + diseaseName)
|
|
|
|
}
|
|
|
|
if (diseaseEffectedSex != "Male" && diseaseEffectedSex != "Female" && diseaseEffectedSex != "Both"){
|
|
|
|
t.Fatalf("PolygenicDisease effected sex is invalid: " + diseaseEffectedSex)
|
|
|
|
}
|
|
|
|
|
2024-08-13 15:25:47 +02:00
|
|
|
for rsID, referencesMap := range diseaseLocusReferencesMap{
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-08-13 15:25:47 +02:00
|
|
|
containsItem := slices.Contains(diseaseLociList, rsID)
|
|
|
|
if (containsItem == false){
|
|
|
|
t.Fatalf("Polygenic disease diseaseLocusReferencesMap contains disease locus that is not inside of the disease's loci list.")
|
2024-04-11 15:51:56 +02:00
|
|
|
}
|
|
|
|
|
2024-08-13 15:25:47 +02:00
|
|
|
allRSIDsMap[rsID] = struct{}{}
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-08-13 15:25:47 +02:00
|
|
|
referencesAreValid := verifyReferencesMap(referencesMap)
|
|
|
|
if (referencesAreValid == false){
|
|
|
|
t.Fatalf("PolygenicDisease references map is invalid for disease locus.")
|
2024-04-11 15:51:56 +02:00
|
|
|
}
|
2024-08-13 15:25:47 +02:00
|
|
|
}
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-08-13 15:25:47 +02:00
|
|
|
containsDuplicates, _ := helpers.CheckIfListContainsDuplicates(diseaseLociList)
|
|
|
|
if (containsDuplicates == true){
|
|
|
|
t.Fatalf("Polygenic disease object contains diseaseLociList with duplicate rsIDs.")
|
|
|
|
}
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-08-13 15:25:47 +02:00
|
|
|
if (len(diseaseLocusReferencesMap) > len(diseaseLociList)){
|
|
|
|
t.Fatalf("Polygenic disease contains locus references map that is longer than the diseaseLociList")
|
|
|
|
}
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-08-13 15:25:47 +02:00
|
|
|
referencesAreValid := verifyReferencesMap(diseaseReferencesMap)
|
|
|
|
if (referencesAreValid == false){
|
|
|
|
t.Fatalf("PolygenicDisease references map is invalid for disease: " + diseaseName)
|
2024-04-11 15:51:56 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-08-05 09:11:10 +02:00
|
|
|
err = traits.InitializeTraitVariables()
|
|
|
|
if (err != nil){
|
|
|
|
t.Fatalf("InitializeTraitVariables failed: " + err.Error())
|
|
|
|
}
|
2024-04-11 15:51:56 +02:00
|
|
|
|
|
|
|
traitObjectsList, err := traits.GetTraitObjectsList()
|
|
|
|
if (err != nil){
|
|
|
|
t.Fatalf("Failed to get trait objects list: " + err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
traitNamesMap := make(map[string]struct{})
|
|
|
|
|
|
|
|
for _, traitObject := range traitObjectsList{
|
|
|
|
|
|
|
|
traitName := traitObject.TraitName
|
|
|
|
traitDescription := traitObject.TraitDescription
|
2024-07-19 19:16:28 +02:00
|
|
|
traitDiscreteOrNumeric := traitObject.DiscreteOrNumeric
|
|
|
|
traitLocusReferencesMap := traitObject.LocusReferencesMap
|
2024-04-11 15:51:56 +02:00
|
|
|
traitLociList := traitObject.LociList
|
2024-07-19 19:16:28 +02:00
|
|
|
traitLociList_Rules := traitObject.LociList_Rules
|
2024-04-11 15:51:56 +02:00
|
|
|
traitRulesList := traitObject.RulesList
|
|
|
|
traitOutcomesList := traitObject.OutcomesList
|
2024-07-19 19:16:28 +02:00
|
|
|
traitReferencesMap := traitObject.ReferencesMap
|
2024-04-11 15:51:56 +02:00
|
|
|
|
|
|
|
if (traitName == ""){
|
|
|
|
t.Fatalf("Empty trait name exists.")
|
|
|
|
}
|
|
|
|
_, exists := traitNamesMap[traitName]
|
|
|
|
if (exists == true){
|
|
|
|
t.Fatalf("Duplicate trait name exists: " + traitName)
|
|
|
|
}
|
|
|
|
traitNamesMap[traitName] = struct{}{}
|
|
|
|
|
|
|
|
if (traitDescription == ""){
|
|
|
|
t.Fatalf("Empty trait description exists for trait: " + traitName)
|
|
|
|
}
|
2024-07-19 19:16:28 +02:00
|
|
|
if (traitDiscreteOrNumeric != "Discrete" && traitDiscreteOrNumeric != "Numeric"){
|
|
|
|
t.Fatalf("Invalid DiscreteOrNumeric for trait: " + traitDiscreteOrNumeric)
|
|
|
|
}
|
2024-04-11 15:51:56 +02:00
|
|
|
if (len(traitOutcomesList) != 0){
|
|
|
|
|
|
|
|
if (len(traitOutcomesList) < 2){
|
|
|
|
t.Fatalf("Not enough trait outcomes for trait: " + traitName)
|
|
|
|
}
|
|
|
|
for _, traitOutcome := range traitOutcomesList{
|
|
|
|
if (traitOutcome == ""){
|
|
|
|
t.Fatalf("Empty trait outcome exists for trait: " + traitName)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
|
|
|
|
// If there are no outcomes, then no rules can exist
|
|
|
|
if (len(traitRulesList) != 0){
|
|
|
|
t.Fatalf("Trait outcomes list is empty, trait rules list is not.")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
referencesAreValid := verifyReferencesMap(traitReferencesMap)
|
|
|
|
if (referencesAreValid == false){
|
|
|
|
t.Fatalf("Invalid references exist for trait: " + traitName)
|
|
|
|
}
|
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
if (len(traitLocusReferencesMap) == 0){
|
|
|
|
t.Fatalf("No trait locus references exist for trait: " + traitName)
|
|
|
|
}
|
|
|
|
|
|
|
|
for locusRSID, locusReferences := range traitLocusReferencesMap{
|
|
|
|
|
|
|
|
allRSIDsMap[locusRSID] = struct{}{}
|
|
|
|
|
|
|
|
if (locusReferences == nil){
|
|
|
|
t.Fatalf("A trait locus has no references map: " + traitName)
|
|
|
|
}
|
|
|
|
if (len(locusReferences) == 0){
|
|
|
|
t.Fatalf("A trait locus has no references: " + traitName)
|
|
|
|
}
|
|
|
|
|
|
|
|
locusExists := slices.Contains(traitLociList, locusRSID)
|
|
|
|
if (locusExists == false){
|
|
|
|
t.Fatalf("traitLocusReferencesMap contains rsID which does not exist in traitLociList")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-11 15:51:56 +02:00
|
|
|
if (len(traitLociList) == 0){
|
|
|
|
t.Fatalf("No trait loci exist for trait: " + traitName)
|
|
|
|
}
|
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
for _, rsID := range traitLociList{
|
|
|
|
allRSIDsMap[rsID] = struct{}{}
|
2024-04-11 15:51:56 +02:00
|
|
|
}
|
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
for _, rsID := range traitLociList_Rules{
|
|
|
|
|
|
|
|
locusExists := slices.Contains(traitLociList, rsID)
|
|
|
|
if (locusExists == false){
|
|
|
|
t.Fatalf("traitLociList_Rules contains locus not present in traitLociList")
|
|
|
|
}
|
2024-04-11 15:51:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (len(traitRulesList) == 0){
|
|
|
|
// No rules exist.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, ruleObject := range traitRulesList{
|
|
|
|
|
|
|
|
ruleIdentifier := ruleObject.RuleIdentifier
|
|
|
|
ruleLociList := ruleObject.LociList
|
|
|
|
ruleOutcomePointsMap := ruleObject.OutcomePointsMap
|
2024-07-19 19:16:28 +02:00
|
|
|
ruleReferencesMap := ruleObject.ReferencesMap
|
2024-04-11 15:51:56 +02:00
|
|
|
|
|
|
|
identifierIsValid := verifyIdentifier(ruleIdentifier)
|
|
|
|
if (identifierIsValid == false){
|
|
|
|
t.Fatalf("Invalid identifier exists: " + ruleIdentifier)
|
|
|
|
}
|
|
|
|
_, exists := allIdentifiersMap[ruleIdentifier]
|
|
|
|
if (exists == true){
|
|
|
|
t.Fatalf("Duplicate identifier exists: " + ruleIdentifier)
|
|
|
|
}
|
|
|
|
allIdentifiersMap[ruleIdentifier] = struct{}{}
|
|
|
|
|
|
|
|
if (len(ruleOutcomePointsMap) == 0){
|
|
|
|
t.Fatalf("Rule contains empty rule outcome points map: " + ruleIdentifier)
|
|
|
|
}
|
|
|
|
|
|
|
|
for outcomeName, _ := range ruleOutcomePointsMap{
|
|
|
|
isValid := slices.Contains(traitOutcomesList, outcomeName)
|
|
|
|
if (isValid == false){
|
|
|
|
t.Fatalf("Rule outcome points map contains invalid outcome: " + outcomeName)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len(ruleLociList) == 0){
|
|
|
|
t.Fatalf("Rule contains empty rule loci list: " + ruleIdentifier)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, locusObject := range ruleLociList{
|
|
|
|
|
|
|
|
locusIdentifier := locusObject.LocusIdentifier
|
|
|
|
locusRSID := locusObject.LocusRSID
|
|
|
|
locusBasePairsList := locusObject.BasePairsList
|
|
|
|
|
|
|
|
allRSIDsMap[locusRSID] = struct{}{}
|
|
|
|
|
|
|
|
isValid := verifyIdentifier(locusIdentifier)
|
|
|
|
if (isValid == false){
|
|
|
|
t.Fatalf("Trait rule Locus identifier is invalid: " + locusIdentifier)
|
|
|
|
}
|
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
_, mapContainsItem := traitLocusReferencesMap[locusRSID]
|
|
|
|
if (mapContainsItem == false){
|
|
|
|
t.Fatalf("Rule locus contains rsid which is not contained within LocusReferencesMap.")
|
|
|
|
}
|
|
|
|
|
|
|
|
sliceContainsItem := slices.Contains(traitLociList, locusRSID)
|
|
|
|
if (sliceContainsItem == false){
|
2024-04-11 15:51:56 +02:00
|
|
|
t.Fatalf("Rule locus contains rsid which is not contained within traitLociList.")
|
|
|
|
}
|
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
sliceContainsItem = slices.Contains(traitLociList_Rules, locusRSID)
|
|
|
|
if (sliceContainsItem == false){
|
|
|
|
t.Fatalf("Rule locus contains rsid which is not contained within traitLociList_Rules.")
|
|
|
|
}
|
|
|
|
|
2024-04-11 15:51:56 +02:00
|
|
|
if (len(locusBasePairsList) == 0){
|
|
|
|
t.Fatalf("Trait rule locus base pairs list is empty: " + locusIdentifier)
|
|
|
|
}
|
|
|
|
for _, locusBasePair := range locusBasePairsList{
|
|
|
|
|
|
|
|
basePairIsValid := verifyBasePair(locusBasePair)
|
|
|
|
if (basePairIsValid == false){
|
|
|
|
t.Fatalf("Rule Locus base pairs list contains invalid base pair: " + locusBasePair)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
referencesAreValid := verifyReferencesMap(ruleReferencesMap)
|
2024-04-11 15:51:56 +02:00
|
|
|
if (referencesAreValid == false){
|
|
|
|
t.Fatalf("Invalid references map for trait rule locus: " + ruleIdentifier)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
err = locusMetadata.InitializeLocusMetadataVariables()
|
|
|
|
if (err != nil){
|
|
|
|
t.Fatalf("Failed to initialize locus metadata variables: " + err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
locusMetadataObjectsList, err := locusMetadata.GetLocusMetadataObjectsList()
|
|
|
|
if (err != nil){
|
|
|
|
t.Fatalf("GetLocusMetadataObjectsList failed: " + err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
// We use the locusPositionsMap to make sure there are no locations that refer to the same position on the same chromosome
|
|
|
|
|
|
|
|
type locusPositionStruct struct{
|
|
|
|
chromosome int
|
|
|
|
position int
|
|
|
|
}
|
|
|
|
|
|
|
|
locusPositionsMap := make(map[locusPositionStruct]struct{})
|
|
|
|
|
|
|
|
// We use the companyAliasesMap to make sure there are no company alias collisions.
|
|
|
|
//
|
|
|
|
// We only care about alias collisions within each company.
|
|
|
|
// Multiple companies can refer to the same location with the same alias.
|
|
|
|
|
|
|
|
type companyAliasStruct struct{
|
|
|
|
|
|
|
|
geneticsCompany locusMetadata.GeneticsCompany
|
|
|
|
locusAlias string
|
|
|
|
}
|
|
|
|
|
|
|
|
companyAliasesMap := make(map[companyAliasStruct]struct{})
|
|
|
|
|
|
|
|
// We use this map to make sure that locus metadata rsIDs do not collide.
|
|
|
|
// We don't want any duplicate rsIDs within any of the loci.
|
|
|
|
locusMetadataRSIDsMap := make(map[int64]struct{})
|
|
|
|
|
|
|
|
for _, locusMetadataObject := range locusMetadataObjectsList{
|
|
|
|
|
|
|
|
rsidsList := locusMetadataObject.RSIDsList
|
|
|
|
locusChromosome := locusMetadataObject.Chromosome
|
|
|
|
locusPosition := locusMetadataObject.Position
|
2024-08-05 09:11:10 +02:00
|
|
|
geneInfoIsKnown := locusMetadataObject.GeneInfoIsKnown
|
|
|
|
geneExists := locusMetadataObject.GeneExists
|
2024-04-11 15:51:56 +02:00
|
|
|
geneNamesList := locusMetadataObject.GeneNamesList
|
|
|
|
locusCompanyAliasesMap := locusMetadataObject.CompanyAliases
|
|
|
|
referencesMap := locusMetadataObject.References
|
|
|
|
|
|
|
|
if (len(rsidsList) == 0){
|
|
|
|
t.Fatalf("locusMetadataObjectsList contains locus with empty RSIDs list.")
|
|
|
|
}
|
|
|
|
|
|
|
|
// The primary RSID is the only rsID which should appear in the genetic references
|
|
|
|
// The primary RSID is the first rsID in the locus rsIDs list
|
|
|
|
primaryRSID := rsidsList[0]
|
|
|
|
|
|
|
|
_, exists := allRSIDsMap[primaryRSID]
|
|
|
|
if (exists == false){
|
|
|
|
t.Fatalf("locusMetadataObjectsList contains unnecessary locus: No matching rsids exist.")
|
|
|
|
}
|
|
|
|
|
|
|
|
for index, rsID := range rsidsList{
|
|
|
|
|
|
|
|
_, exists := locusMetadataRSIDsMap[rsID]
|
|
|
|
if (exists == true){
|
|
|
|
|
2024-08-05 09:11:10 +02:00
|
|
|
rsidString := helpers.ConvertInt64ToString(rsID)
|
|
|
|
t.Fatalf("locusMetadataObjectsList contains duplicate RSID: " + rsidString)
|
2024-04-11 15:51:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
locusMetadataRSIDsMap[rsID] = struct{}{}
|
|
|
|
|
|
|
|
if (index != 0){
|
|
|
|
|
|
|
|
// This is not a primary rsID
|
|
|
|
_, exists = allRSIDsMap[rsID]
|
|
|
|
if (exists == true){
|
|
|
|
rsIDString := helpers.ConvertInt64ToString(rsID)
|
|
|
|
t.Fatalf("allRSIDsMap contains non-primary rsID: " + rsIDString)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (locusChromosome == 0){
|
|
|
|
// 0 is uninitialized.
|
|
|
|
t.Fatalf("locusMetadataObjectsList contains locus with 0 chromosome.")
|
|
|
|
}
|
|
|
|
|
|
|
|
if (locusPosition == 0){
|
|
|
|
// 0 is uninitialized.
|
|
|
|
t.Fatalf("locusMetadataObjectsList contains locus with 0 position.")
|
|
|
|
}
|
|
|
|
|
|
|
|
locusPositionObject := locusPositionStruct{
|
|
|
|
chromosome: locusChromosome,
|
|
|
|
position: locusPosition,
|
|
|
|
}
|
|
|
|
|
|
|
|
_, exists = locusPositionsMap[locusPositionObject]
|
|
|
|
if (exists == true){
|
|
|
|
t.Fatalf("locusMetadataObjectsList contains locus position collision.")
|
|
|
|
}
|
|
|
|
|
|
|
|
locusPositionsMap[locusPositionObject] = struct{}{}
|
|
|
|
|
2024-08-05 09:11:10 +02:00
|
|
|
if (geneInfoIsKnown == true && geneExists == true){
|
|
|
|
|
|
|
|
if (len(geneNamesList) == 0){
|
|
|
|
t.Fatalf("locusMetadataObjectsList contains locus with known gene and empty geneNamesList.")
|
|
|
|
}
|
|
|
|
|
2024-04-11 15:51:56 +02:00
|
|
|
for _, geneName := range geneNamesList{
|
|
|
|
if (geneName == ""){
|
|
|
|
t.Fatalf("locusMetadataObjectsList contains locus with empty geneName in geneNamesList.")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for companyObject, companyAliasesList := range locusCompanyAliasesMap{
|
|
|
|
|
|
|
|
for _, locusCompanyAlias := range companyAliasesList{
|
|
|
|
|
|
|
|
companyAliasObject := companyAliasStruct{
|
|
|
|
geneticsCompany: companyObject,
|
|
|
|
locusAlias: locusCompanyAlias,
|
|
|
|
}
|
|
|
|
|
|
|
|
_, exists := companyAliasesMap[companyAliasObject]
|
|
|
|
if (exists == true){
|
|
|
|
t.Fatalf("locusMetadataObjectsList contains companyAlias collision: " + locusCompanyAlias)
|
|
|
|
}
|
|
|
|
|
|
|
|
companyAliasesMap[companyAliasObject] = struct{}{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
isValid := verifyReferencesMap(referencesMap)
|
|
|
|
if (isValid == false){
|
|
|
|
t.Fatalf("locusMetadataObjectsList contains invalid references map.")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-08-05 09:11:10 +02:00
|
|
|
//TODO: Check to make sure that there are no identical company aliases for different loci
|
|
|
|
|
2024-04-11 15:51:56 +02:00
|
|
|
missingLociList := make([]int64, 0)
|
|
|
|
|
|
|
|
for rsID, _ := range allRSIDsMap{
|
|
|
|
|
|
|
|
_, exists := locusMetadataRSIDsMap[rsID]
|
|
|
|
if (exists == false){
|
|
|
|
missingLociList = append(missingLociList, rsID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len(missingLociList) != 0){
|
|
|
|
|
|
|
|
missingLociStringsList := make([]string, 0, len(missingLociList))
|
|
|
|
|
|
|
|
for _, rsID := range missingLociList{
|
|
|
|
|
|
|
|
rsIDString := helpers.ConvertInt64ToString(rsID)
|
|
|
|
|
|
|
|
missingLociStringsList = append(missingLociStringsList, rsIDString)
|
|
|
|
}
|
|
|
|
|
|
|
|
missingLociListFormatted := strings.Join(missingLociStringsList, ", ")
|
|
|
|
|
|
|
|
t.Fatalf("locusMetadata is missing loci: " + missingLociListFormatted)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
// We use this to determine the greatest possible number of variants tested
|
|
|
|
// This needs to be updated in profileFormat whenever a new monogenic disease is added which exceeds this value
|
|
|
|
func TestGetHighestPossibleMonogenicDiseaseVariantCount(t *testing.T){
|
|
|
|
|
|
|
|
monogenicDiseases.InitializeMonogenicDiseaseVariables()
|
|
|
|
|
|
|
|
monogenicDiseasesObjectsList, err := monogenicDiseases.GetMonogenicDiseaseObjectsList()
|
|
|
|
if (err != nil){
|
|
|
|
t.Fatalf("Failed to get monogenic disease objects list: " + err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
highestCount := 0
|
|
|
|
|
|
|
|
for _, diseaseObject := range monogenicDiseasesObjectsList{
|
|
|
|
|
|
|
|
diseaseVariantsList := diseaseObject.VariantsList
|
|
|
|
|
|
|
|
diseaseNumberOfVariants := len(diseaseVariantsList)
|
|
|
|
|
|
|
|
if (diseaseNumberOfVariants > highestCount){
|
|
|
|
highestCount = diseaseNumberOfVariants
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
highestVariantCountString := helpers.ConvertIntToString(highestCount)
|
|
|
|
|
|
|
|
log.Println("Most monogenic disease variants: " + highestVariantCountString)
|
|
|
|
}
|
|
|
|
|
|
|
|
*/
|