// locusMetadata provides information about locations in the human genome. package locusMetadata // Locus position information should correspond to Human genome reference build 38. import "seekia/internal/helpers" import _ "embed" import "encoding/gob" import "errors" import "bytes" //go:embed LocusMetadata.gob var LocusMetadataFile []byte type LocusMetadata struct{ // A list of RSIDs that refer to this location // Each RSID is equivalent and refers to the same location // rsID stands for Reference SNP cluster ID. // Each rsID is an "rs" followed by a number. // We store the number after the rs as an int64. RSIDsList []int64 // The chromosome which this location exists on Chromosome int // The position of this locus // This is a number describing its location on the chromosome it exists on. Position int // This is true if we know any information about the gene this locus belongs to, and if there even is a gene GeneInfoIsKnown bool // This is true if the locus exists within a gene // Some loci are non-coding, meaning they don't exist within a gene and code for a protein GeneExists bool // A list of gene names which refer to the gene which this locus belongs to. // Each gene name refers to the same gene. // Will be a nil list if gene info is not known, or no gene exists GeneNamesList []string // A list of alternate names for the rsid used by companies // These are the names that the raw genome files exported from companies sometimes use instead of rsIDs // Example: TwentyThreeAndMe -> []string{"i5010839", "i5006049", "i4000295", "i5010838", "i5010837"} CompanyAliases map[GeneticsCompany][]string // Reference name -> Reference link References map[string]string } // We use this data structure to save space, rather than using String type GeneticsCompany byte const TwentyThreeAndMe GeneticsCompany = 1 const FamilyTreeDNA GeneticsCompany = 2 const MyHeritage GeneticsCompany = 3 // Map Structure: RSID -> Locus Metadata Object var lociMetadataMap map[int64]LocusMetadata // This map stores a list of aliases for rsids which have aliases // An alias is a different rsid which represents the same locus var rsidAliasesMap map[int64][]int64 // We use these maps to store the locus aliases for rsIDs used by companies // Map structure: Alias -> Primary rsID (there may be aliases) // Example: "i5010839" -> 78655421 var companyAliasesMap_23andMe map[string]int64 var companyAliasesMap_FamilyTreeDNA map[string]int64 var companyAliasesMap_MyHeritage map[string]int64 func InitializeLocusMetadataVariables()error{ lociMetadataMap = make(map[int64]LocusMetadata) rsidAliasesMap = make(map[int64][]int64) companyAliasesMap_23andMe = make(map[string]int64) companyAliasesMap_FamilyTreeDNA = make(map[string]int64) companyAliasesMap_MyHeritage = make(map[string]int64) locusObjectsList, err := GetLocusMetadataObjectsList() if (err != nil) { return err } for _, locusObject := range locusObjectsList{ rsidsList := locusObject.RSIDsList for _, rsID := range rsidsList{ _, exists := lociMetadataMap[rsID] if (exists == true){ return errors.New("lociMetadataMap contains duplicate rsID.") } lociMetadataMap[rsID] = locusObject } if (len(rsidsList) > 1){ // We add rsid aliases to map for _, rsID := range rsidsList{ rsidAliasesList := make([]int64, 0) for _, rsidInner := range rsidsList{ if (rsID != rsidInner){ rsidAliasesList = append(rsidAliasesList, rsidInner) } } rsidAliasesMap[rsID] = rsidAliasesList } } companyAliasesMap := locusObject.CompanyAliases if (len(companyAliasesMap) > 0){ // Now we add company aliases to maps primaryRSID := rsidsList[0] for companyObject, companyAliasesList := range companyAliasesMap{ if (companyObject == TwentyThreeAndMe){ for _, locusAlias := range companyAliasesList{ companyAliasesMap_23andMe[locusAlias] = primaryRSID } } else if (companyObject == FamilyTreeDNA){ for _, locusAlias := range companyAliasesList{ companyAliasesMap_FamilyTreeDNA[locusAlias] = primaryRSID } } else if (companyObject == MyHeritage){ for _, locusAlias := range companyAliasesList{ companyAliasesMap_MyHeritage[locusAlias] = primaryRSID } } else { companyByteString := helpers.ConvertIntToString(int(companyObject)) return errors.New("Locus Object company aliases map contains invalid company object: " + companyByteString) } } } } return nil } //Outputs: // -bool: Locus metadata exists // -LocusMetadata // -error func GetLocusMetadata(inputRSID int64)(bool, LocusMetadata, error){ if (lociMetadataMap == nil){ return false, LocusMetadata{}, errors.New("GetLocusMetadata called when lociMetadataMap is not initialized.") } locusMetadataObject, exists := lociMetadataMap[inputRSID] if (exists == false){ return false, LocusMetadata{}, nil } return true, locusMetadataObject, nil } // This function will return a list of RSIDs which refer to the same location as the input RSID // -bool: Any Aliases exist // -[]int64: List of alias RSIDs // -error (if RSID is unknown) func GetRSIDAliases(inputRSID int64)(bool, []int64, error){ if (rsidAliasesMap == nil){ return false, nil, errors.New("rsidAliasesMap called when rsidAliasesMap is not initialized.") } aliasesList, exists := rsidAliasesMap[inputRSID] if (exists == false){ return false, nil, nil } return true, aliasesList, nil } //Outputs: // -bool: Alias found // -int64: Primary rsID alias to use to represent this locus // -error func GetCompanyAliasRSID(companyName string, locusAlias string)(bool, int64, error){ if (companyName == "23andMe"){ locusRSID, exists := companyAliasesMap_23andMe[locusAlias] if (exists == false){ return false, 0, nil } return true, locusRSID, nil } else if (companyName == "FamilyTreeDNA"){ locusRSID, exists := companyAliasesMap_FamilyTreeDNA[locusAlias] if (exists == false){ return false, 0, nil } return true, locusRSID, nil } else if (companyName == "MyHeritage"){ locusRSID, exists := companyAliasesMap_MyHeritage[locusAlias] if (exists == false){ return false, 0, nil } return true, locusRSID, nil } return false, 0, errors.New("GetCompanyAliasRSID called with invalid companyName: " + companyName) } func GetLocusMetadataObjectsList()([]LocusMetadata, error){ buffer := bytes.NewBuffer(LocusMetadataFile) decoder := gob.NewDecoder(buffer) var locusMetadataObjectsList []LocusMetadata err := decoder.Decode(&locusMetadataObjectsList) if (err != nil){ return nil, err } return locusMetadataObjectsList, nil }