251 lines
6.5 KiB
Go
251 lines
6.5 KiB
Go
|
|
// locusMetadata provides information about locations in the human genome.
|
|
|
|
package locusMetadata
|
|
|
|
// Locus position information should correspond to Human genome reference build 38.
|
|
|
|
import "seekia/internal/helpers"
|
|
|
|
import _ "embed"
|
|
|
|
import "encoding/gob"
|
|
import "errors"
|
|
import "bytes"
|
|
|
|
|
|
//go:embed LocusMetadata.gob
|
|
var LocusMetadataFile []byte
|
|
|
|
|
|
type LocusMetadata struct{
|
|
|
|
// A list of RSIDs that refer to this location
|
|
// Each RSID is equivalent and refers to the same location
|
|
// rsID stands for Reference SNP cluster ID.
|
|
// Each rsID is an "rs" followed by a number.
|
|
// We store the number after the rs as an int64.
|
|
RSIDsList []int64
|
|
|
|
// The chromosome which this location exists on
|
|
Chromosome int
|
|
|
|
// The position of this locus
|
|
// This is a number describing its location on the chromosome it exists on.
|
|
Position int
|
|
|
|
// This is true if we know any information about the gene this locus belongs to, and if there even is a gene
|
|
GeneInfoIsKnown bool
|
|
|
|
// This is true if the locus exists within a gene
|
|
// Some loci are non-coding, meaning they don't exist within a gene and code for a protein
|
|
GeneExists bool
|
|
|
|
// A list of gene names which refer to the gene which this locus belongs to.
|
|
// Each gene name refers to the same gene.
|
|
// Will be a nil list if gene info is not known, or no gene exists
|
|
GeneNamesList []string
|
|
|
|
// A list of alternate names for the rsid used by companies
|
|
// These are the names that the raw genome files exported from companies sometimes use instead of rsIDs
|
|
// Example: TwentyThreeAndMe -> []string{"i5010839", "i5006049", "i4000295", "i5010838", "i5010837"}
|
|
CompanyAliases map[GeneticsCompany][]string
|
|
|
|
// Reference name -> Reference link
|
|
References map[string]string
|
|
}
|
|
|
|
// We use this data structure to save space, rather than using String
|
|
type GeneticsCompany byte
|
|
|
|
const TwentyThreeAndMe GeneticsCompany = 1
|
|
const FamilyTreeDNA GeneticsCompany = 2
|
|
const MyHeritage GeneticsCompany = 3
|
|
|
|
// Map Structure: RSID -> Locus Metadata Object
|
|
var lociMetadataMap map[int64]LocusMetadata
|
|
|
|
// This map stores a list of aliases for rsids which have aliases
|
|
// An alias is a different rsid which represents the same locus
|
|
var rsidAliasesMap map[int64][]int64
|
|
|
|
// We use these maps to store the locus aliases for rsIDs used by companies
|
|
// Map structure: Alias -> Primary rsID (there may be aliases)
|
|
// Example: "i5010839" -> 78655421
|
|
var companyAliasesMap_23andMe map[string]int64
|
|
var companyAliasesMap_FamilyTreeDNA map[string]int64
|
|
var companyAliasesMap_MyHeritage map[string]int64
|
|
|
|
|
|
func InitializeLocusMetadataVariables()error{
|
|
|
|
lociMetadataMap = make(map[int64]LocusMetadata)
|
|
rsidAliasesMap = make(map[int64][]int64)
|
|
|
|
companyAliasesMap_23andMe = make(map[string]int64)
|
|
companyAliasesMap_FamilyTreeDNA = make(map[string]int64)
|
|
companyAliasesMap_MyHeritage = make(map[string]int64)
|
|
|
|
locusObjectsList, err := GetLocusMetadataObjectsList()
|
|
if (err != nil) { return err }
|
|
|
|
for _, locusObject := range locusObjectsList{
|
|
|
|
rsidsList := locusObject.RSIDsList
|
|
|
|
for _, rsID := range rsidsList{
|
|
|
|
_, exists := lociMetadataMap[rsID]
|
|
if (exists == true){
|
|
return errors.New("lociMetadataMap contains duplicate rsID.")
|
|
}
|
|
|
|
lociMetadataMap[rsID] = locusObject
|
|
}
|
|
|
|
if (len(rsidsList) > 1){
|
|
|
|
// We add rsid aliases to map
|
|
|
|
for _, rsID := range rsidsList{
|
|
|
|
rsidAliasesList := make([]int64, 0)
|
|
|
|
for _, rsidInner := range rsidsList{
|
|
|
|
if (rsID != rsidInner){
|
|
rsidAliasesList = append(rsidAliasesList, rsidInner)
|
|
}
|
|
}
|
|
|
|
rsidAliasesMap[rsID] = rsidAliasesList
|
|
}
|
|
}
|
|
|
|
companyAliasesMap := locusObject.CompanyAliases
|
|
|
|
if (len(companyAliasesMap) > 0){
|
|
|
|
// Now we add company aliases to maps
|
|
|
|
primaryRSID := rsidsList[0]
|
|
|
|
for companyObject, companyAliasesList := range companyAliasesMap{
|
|
|
|
if (companyObject == TwentyThreeAndMe){
|
|
|
|
for _, locusAlias := range companyAliasesList{
|
|
companyAliasesMap_23andMe[locusAlias] = primaryRSID
|
|
}
|
|
|
|
} else if (companyObject == FamilyTreeDNA){
|
|
|
|
for _, locusAlias := range companyAliasesList{
|
|
companyAliasesMap_FamilyTreeDNA[locusAlias] = primaryRSID
|
|
}
|
|
|
|
} else if (companyObject == MyHeritage){
|
|
|
|
for _, locusAlias := range companyAliasesList{
|
|
companyAliasesMap_MyHeritage[locusAlias] = primaryRSID
|
|
}
|
|
|
|
} else {
|
|
companyByteString := helpers.ConvertIntToString(int(companyObject))
|
|
return errors.New("Locus Object company aliases map contains invalid company object: " + companyByteString)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
//Outputs:
|
|
// -bool: Locus metadata exists
|
|
// -LocusMetadata
|
|
// -error
|
|
func GetLocusMetadata(inputRSID int64)(bool, LocusMetadata, error){
|
|
|
|
if (lociMetadataMap == nil){
|
|
return false, LocusMetadata{}, errors.New("GetLocusMetadata called when lociMetadataMap is not initialized.")
|
|
}
|
|
|
|
locusMetadataObject, exists := lociMetadataMap[inputRSID]
|
|
if (exists == false){
|
|
return false, LocusMetadata{}, nil
|
|
}
|
|
|
|
return true, locusMetadataObject, nil
|
|
}
|
|
|
|
// This function will return a list of RSIDs which refer to the same location as the input RSID
|
|
// -bool: Any Aliases exist
|
|
// -[]int64: List of alias RSIDs
|
|
// -error (if RSID is unknown)
|
|
func GetRSIDAliases(inputRSID int64)(bool, []int64, error){
|
|
|
|
if (rsidAliasesMap == nil){
|
|
return false, nil, errors.New("rsidAliasesMap called when rsidAliasesMap is not initialized.")
|
|
}
|
|
|
|
aliasesList, exists := rsidAliasesMap[inputRSID]
|
|
if (exists == false){
|
|
return false, nil, nil
|
|
}
|
|
|
|
return true, aliasesList, nil
|
|
}
|
|
|
|
|
|
//Outputs:
|
|
// -bool: Alias found
|
|
// -int64: Primary rsID alias to use to represent this locus
|
|
// -error
|
|
func GetCompanyAliasRSID(companyName string, locusAlias string)(bool, int64, error){
|
|
|
|
if (companyName == "23andMe"){
|
|
|
|
locusRSID, exists := companyAliasesMap_23andMe[locusAlias]
|
|
if (exists == false){
|
|
return false, 0, nil
|
|
}
|
|
|
|
return true, locusRSID, nil
|
|
|
|
} else if (companyName == "FamilyTreeDNA"){
|
|
|
|
locusRSID, exists := companyAliasesMap_FamilyTreeDNA[locusAlias]
|
|
if (exists == false){
|
|
return false, 0, nil
|
|
}
|
|
|
|
return true, locusRSID, nil
|
|
|
|
} else if (companyName == "MyHeritage"){
|
|
|
|
locusRSID, exists := companyAliasesMap_MyHeritage[locusAlias]
|
|
if (exists == false){
|
|
return false, 0, nil
|
|
}
|
|
|
|
return true, locusRSID, nil
|
|
}
|
|
|
|
return false, 0, errors.New("GetCompanyAliasRSID called with invalid companyName: " + companyName)
|
|
}
|
|
|
|
func GetLocusMetadataObjectsList()([]LocusMetadata, error){
|
|
|
|
buffer := bytes.NewBuffer(LocusMetadataFile)
|
|
|
|
decoder := gob.NewDecoder(buffer)
|
|
|
|
var locusMetadataObjectsList []LocusMetadata
|
|
|
|
err := decoder.Decode(&locusMetadataObjectsList)
|
|
if (err != nil){ return nil, err }
|
|
|
|
return locusMetadataObjectsList, nil
|
|
}
|
|
|