// locusMetadata provides information about gene locations. package locusMetadata // Locus position information should correspond to Human genome reference build 38. import "seekia/internal/helpers" import _ "embed" import "encoding/json" import "errors" //go:embed LocusMetadata_Chromosome1.json var LocusMetadataFile_Chromosome1 []byte //go:embed LocusMetadata_Chromosome2.json var LocusMetadataFile_Chromosome2 []byte //go:embed LocusMetadata_Chromosome3.json var LocusMetadataFile_Chromosome3 []byte //go:embed LocusMetadata_Chromosome4.json var LocusMetadataFile_Chromosome4 []byte //go:embed LocusMetadata_Chromosome5.json var LocusMetadataFile_Chromosome5 []byte //go:embed LocusMetadata_Chromosome6.json var LocusMetadataFile_Chromosome6 []byte //go:embed LocusMetadata_Chromosome7.json var LocusMetadataFile_Chromosome7 []byte //go:embed LocusMetadata_Chromosome8.json var LocusMetadataFile_Chromosome8 []byte //go:embed LocusMetadata_Chromosome9.json var LocusMetadataFile_Chromosome9 []byte //go:embed LocusMetadata_Chromosome10.json var LocusMetadataFile_Chromosome10 []byte //go:embed LocusMetadata_Chromosome11.json var LocusMetadataFile_Chromosome11 []byte //go:embed LocusMetadata_Chromosome12.json var LocusMetadataFile_Chromosome12 []byte //go:embed LocusMetadata_Chromosome13.json var LocusMetadataFile_Chromosome13 []byte //go:embed LocusMetadata_Chromosome14.json var LocusMetadataFile_Chromosome14 []byte //go:embed LocusMetadata_Chromosome15.json var LocusMetadataFile_Chromosome15 []byte //go:embed LocusMetadata_Chromosome16.json var LocusMetadataFile_Chromosome16 []byte //go:embed LocusMetadata_Chromosome17.json var LocusMetadataFile_Chromosome17 []byte //go:embed LocusMetadata_Chromosome19.json var LocusMetadataFile_Chromosome19 []byte //go:embed LocusMetadata_Chromosome20.json var LocusMetadataFile_Chromosome20 []byte //go:embed LocusMetadata_Chromosome21.json var LocusMetadataFile_Chromosome21 []byte //go:embed LocusMetadata_Chromosome22.json var LocusMetadataFile_Chromosome22 []byte type LocusMetadata struct{ // A list of RSIDs that refer to this location // Each RSID is equivalent and refers to the same location // rsID stands for Reference SNP cluster ID. // Each rsID is an "rs" followed by a number. // We store the number after the rs as an int64. RSIDsList []int64 // The chromosome which this location exists on Chromosome int // The position of this locus // This is a number describing its location on the chromosome it exists on. Position int // A list of gene names which refer to the gene which this locus belongs to. // Each gene name refers to the same gene. // Will be a list containing "MISSING" if the gene name has not been added yet // Will be an empty list if no gene exists GeneNamesList []string // A list of alternate names for the rsid used by companies // These are the names that the raw genome files exported from companies sometimes use instead of rsIDs // Example: TwentyThreeAndMe -> []string{"i5010839", "i5006049", "i4000295", "i5010838", "i5010837"} CompanyAliases map[GeneticsCompany][]string // Reference name -> Reference link References map[string]string } // We use this data structure to save space, rather than using String type GeneticsCompany byte const TwentyThreeAndMe GeneticsCompany = 1 const FamilyTreeDNA GeneticsCompany = 2 const MyHeritage GeneticsCompany = 3 // Map Structure: RSID -> LocusMetadata object var lociMetadataMap map[int64]LocusMetadata // This map stores a list of aliases for rsids which have aliases // An alias is a different rsid which represents the same locus var rsidAliasesMap map[int64][]int64 // We use these maps to store the locus aliases for rsIDs used by companies // Map structure: Alias -> Primary rsID (there may be aliases) // Example: "i5010839" -> 78655421 var companyAliasesMap_23andMe map[string]int64 var companyAliasesMap_FamilyTreeDNA map[string]int64 var companyAliasesMap_MyHeritage map[string]int64 func InitializeLocusMetadataVariables()error{ lociMetadataMap = make(map[int64]LocusMetadata) rsidAliasesMap = make(map[int64][]int64) companyAliasesMap_23andMe = make(map[string]int64) companyAliasesMap_FamilyTreeDNA = make(map[string]int64) companyAliasesMap_MyHeritage = make(map[string]int64) locusObjectsList, err := GetLocusMetadataObjectsList() if (err != nil) { return err } for _, locusObject := range locusObjectsList{ rsidsList := locusObject.RSIDsList for _, rsid := range rsidsList{ _, exists := lociMetadataMap[rsid] if (exists == true){ return errors.New("lociMetadataMap contains duplicate rsid.") } lociMetadataMap[rsid] = locusObject } if (len(rsidsList) > 1){ // We add rsid aliases to map for _, rsid := range rsidsList{ rsidAliasesList := make([]int64, 0) for _, rsidInner := range rsidsList{ if (rsid != rsidInner){ rsidAliasesList = append(rsidAliasesList, rsidInner) } } rsidAliasesMap[rsid] = rsidAliasesList } } companyAliasesMap := locusObject.CompanyAliases if (len(companyAliasesMap) > 0){ // Now we add company aliases to maps primaryRSID := rsidsList[0] for companyObject, companyAliasesList := range companyAliasesMap{ if (companyObject == TwentyThreeAndMe){ for _, locusAlias := range companyAliasesList{ companyAliasesMap_23andMe[locusAlias] = primaryRSID } } else if (companyObject == FamilyTreeDNA){ for _, locusAlias := range companyAliasesList{ companyAliasesMap_FamilyTreeDNA[locusAlias] = primaryRSID } } else if (companyObject == MyHeritage){ for _, locusAlias := range companyAliasesList{ companyAliasesMap_MyHeritage[locusAlias] = primaryRSID } } else { companyByteString := helpers.ConvertIntToString(int(companyObject)) return errors.New("Locus Object company aliases map contains invalid company object: " + companyByteString) } } } } return nil } //Outputs: // -bool: Locus metadata exists // -LocusMetadata // -error func GetLocusMetadata(inputRSID int64)(bool, LocusMetadata, error){ if (lociMetadataMap == nil){ return false, LocusMetadata{}, errors.New("GetLocusMetadata called when lociMetadataMap is not initialized.") } locusMetadataObject, exists := lociMetadataMap[inputRSID] if (exists == false){ return false, LocusMetadata{}, nil } return true, locusMetadataObject, nil } // This function will return a list of RSIDs which refer to the same location as the input RSID // -bool: Any Aliases exist // -[]int64: List of alias RSIDs // -error (if RSID is unknown) func GetRSIDAliases(inputRSID int64)(bool, []int64, error){ if (rsidAliasesMap == nil){ return false, nil, errors.New("rsidAliasesMap called when rsidAliasesMap is not initialized.") } aliasesList, exists := rsidAliasesMap[inputRSID] if (exists == false){ return false, nil, nil } return true, aliasesList, nil } //Outputs: // -bool: Alias found // -int64: Primary rsID alias to use to represent this locus // -error func GetCompanyAliasRSID(companyName string, locusAlias string)(bool, int64, error){ if (companyName == "23andMe"){ locusRSID, exists := companyAliasesMap_23andMe[locusAlias] if (exists == false){ return false, 0, nil } return true, locusRSID, nil } else if (companyName == "FamilyTreeDNA"){ locusRSID, exists := companyAliasesMap_FamilyTreeDNA[locusAlias] if (exists == false){ return false, 0, nil } return true, locusRSID, nil } else if (companyName == "MyHeritage"){ locusRSID, exists := companyAliasesMap_MyHeritage[locusAlias] if (exists == false){ return false, 0, nil } return true, locusRSID, nil } return false, 0, errors.New("GetCompanyAliasRSID called with invalid companyName: " + companyName) } // This function is only public for use in testing func GetLocusMetadataObjectsList()([]LocusMetadata, error){ chromosomesList := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22} locusMetadataObjectsList := make([]LocusMetadata, 0, len(chromosomesList)) for _, chromosomesInt := range chromosomesList{ chromosomeLocusMetadataObjectsList, err := GetLocusMetadataObjectsListByChromosome(chromosomesInt) if (err != nil){ return nil, err } locusMetadataObjectsList = append(locusMetadataObjectsList, chromosomeLocusMetadataObjectsList...) } return locusMetadataObjectsList, nil } func GetLocusMetadataObjectsListByChromosome(chromosome int)([]LocusMetadata, error){ if (chromosome < 1 || chromosome > 22){ chromosomeString := helpers.ConvertIntToString(chromosome) return nil, errors.New("GetLocusMetadataObjectsListByChromosome called with invalid chromosome: " + chromosomeString) } // Outputs: // -bool: File exists // -[]byte: File bytes getFileBytes := func()(bool, []byte){ switch chromosome{ case 1:{ return true, LocusMetadataFile_Chromosome1 } case 2:{ return true, LocusMetadataFile_Chromosome2 } case 3:{ return true, LocusMetadataFile_Chromosome3 } case 4:{ return true, LocusMetadataFile_Chromosome4 } case 5:{ return true, LocusMetadataFile_Chromosome5 } case 6:{ return true, LocusMetadataFile_Chromosome6 } case 7:{ return true, LocusMetadataFile_Chromosome7 } case 8:{ return true, LocusMetadataFile_Chromosome8 } case 9:{ return true, LocusMetadataFile_Chromosome9 } case 10:{ return true, LocusMetadataFile_Chromosome10 } case 11:{ return true, LocusMetadataFile_Chromosome11 } case 12:{ return true, LocusMetadataFile_Chromosome12 } case 13:{ return true, LocusMetadataFile_Chromosome13 } case 14:{ return true, LocusMetadataFile_Chromosome14 } case 15:{ return true, LocusMetadataFile_Chromosome15 } case 16:{ return true, LocusMetadataFile_Chromosome16 } case 17:{ return true, LocusMetadataFile_Chromosome17 } //case 18:{ // return true, LocusMetadataFile_Chromosome18 //} case 19:{ return true, LocusMetadataFile_Chromosome19 } case 20:{ return true, LocusMetadataFile_Chromosome20 } case 21:{ return true, LocusMetadataFile_Chromosome21 } case 22:{ return true, LocusMetadataFile_Chromosome22 } } return false, nil } fileExists, fileBytes := getFileBytes() if (fileExists == false){ // No loci exist for this chromosome emptyList := make([]LocusMetadata, 0) return emptyList, nil } var locusMetadataObjectsList []LocusMetadata err := json.Unmarshal(fileBytes, &locusMetadataObjectsList) if (err != nil) { return nil, err } return locusMetadataObjectsList, nil }