seekia/resources/geneticReferences/locusMetadata/locusMetadata.go

252 lines
6.5 KiB
Go
Raw Normal View History

// locusMetadata provides information about locations in the human genome.
package locusMetadata
// Locus position information should correspond to Human genome reference build 38.
import "seekia/internal/helpers"
import _ "embed"
import "encoding/gob"
import "errors"
import "bytes"
//go:embed LocusMetadata.gob
var LocusMetadataFile []byte
type LocusMetadata struct{
// A list of RSIDs that refer to this location
// Each RSID is equivalent and refers to the same location
// rsID stands for Reference SNP cluster ID.
// Each rsID is an "rs" followed by a number.
// We store the number after the rs as an int64.
RSIDsList []int64
// The chromosome which this location exists on
Chromosome int
// The position of this locus
// This is a number describing its location on the chromosome it exists on.
Position int
// This is true if we know any information about the gene this locus belongs to, and if there even is a gene
GeneInfoIsKnown bool
// This is true if the locus exists within a gene
// Some loci are non-coding, meaning they don't exist within a gene and code for a protein
GeneExists bool
// A list of gene names which refer to the gene which this locus belongs to.
// Each gene name refers to the same gene.
// Will be a nil list if gene info is not known, or no gene exists
GeneNamesList []string
// A list of alternate names for the rsid used by companies
// These are the names that the raw genome files exported from companies sometimes use instead of rsIDs
// Example: TwentyThreeAndMe -> []string{"i5010839", "i5006049", "i4000295", "i5010838", "i5010837"}
CompanyAliases map[GeneticsCompany][]string
// Reference name -> Reference link
References map[string]string
}
// We use this data structure to save space, rather than using String
type GeneticsCompany byte
const TwentyThreeAndMe GeneticsCompany = 1
const FamilyTreeDNA GeneticsCompany = 2
const MyHeritage GeneticsCompany = 3
// Map Structure: RSID -> Locus Metadata Object
var lociMetadataMap map[int64]LocusMetadata
// This map stores a list of aliases for rsids which have aliases
// An alias is a different rsid which represents the same locus
var rsidAliasesMap map[int64][]int64
// We use these maps to store the locus aliases for rsIDs used by companies
// Map structure: Alias -> Primary rsID (there may be aliases)
// Example: "i5010839" -> 78655421
var companyAliasesMap_23andMe map[string]int64
var companyAliasesMap_FamilyTreeDNA map[string]int64
var companyAliasesMap_MyHeritage map[string]int64
func InitializeLocusMetadataVariables()error{
lociMetadataMap = make(map[int64]LocusMetadata)
rsidAliasesMap = make(map[int64][]int64)
companyAliasesMap_23andMe = make(map[string]int64)
companyAliasesMap_FamilyTreeDNA = make(map[string]int64)
companyAliasesMap_MyHeritage = make(map[string]int64)
locusObjectsList, err := GetLocusMetadataObjectsList()
if (err != nil) { return err }
for _, locusObject := range locusObjectsList{
rsidsList := locusObject.RSIDsList
for _, rsID := range rsidsList{
_, exists := lociMetadataMap[rsID]
if (exists == true){
return errors.New("lociMetadataMap contains duplicate rsID.")
}
lociMetadataMap[rsID] = locusObject
}
if (len(rsidsList) > 1){
// We add rsid aliases to map
for _, rsID := range rsidsList{
rsidAliasesList := make([]int64, 0)
for _, rsidInner := range rsidsList{
if (rsID != rsidInner){
rsidAliasesList = append(rsidAliasesList, rsidInner)
}
}
rsidAliasesMap[rsID] = rsidAliasesList
}
}
companyAliasesMap := locusObject.CompanyAliases
if (len(companyAliasesMap) > 0){
// Now we add company aliases to maps
primaryRSID := rsidsList[0]
for companyObject, companyAliasesList := range companyAliasesMap{
if (companyObject == TwentyThreeAndMe){
for _, locusAlias := range companyAliasesList{
companyAliasesMap_23andMe[locusAlias] = primaryRSID
}
} else if (companyObject == FamilyTreeDNA){
for _, locusAlias := range companyAliasesList{
companyAliasesMap_FamilyTreeDNA[locusAlias] = primaryRSID
}
} else if (companyObject == MyHeritage){
for _, locusAlias := range companyAliasesList{
companyAliasesMap_MyHeritage[locusAlias] = primaryRSID
}
} else {
companyByteString := helpers.ConvertIntToString(int(companyObject))
return errors.New("Locus Object company aliases map contains invalid company object: " + companyByteString)
}
}
}
}
return nil
}
//Outputs:
// -bool: Locus metadata exists
// -LocusMetadata
// -error
func GetLocusMetadata(inputRSID int64)(bool, LocusMetadata, error){
if (lociMetadataMap == nil){
return false, LocusMetadata{}, errors.New("GetLocusMetadata called when lociMetadataMap is not initialized.")
}
locusMetadataObject, exists := lociMetadataMap[inputRSID]
if (exists == false){
return false, LocusMetadata{}, nil
}
return true, locusMetadataObject, nil
}
// This function will return a list of RSIDs which refer to the same location as the input RSID
// -bool: Any Aliases exist
// -[]int64: List of alias RSIDs
// -error (if RSID is unknown)
func GetRSIDAliases(inputRSID int64)(bool, []int64, error){
if (rsidAliasesMap == nil){
return false, nil, errors.New("rsidAliasesMap called when rsidAliasesMap is not initialized.")
}
aliasesList, exists := rsidAliasesMap[inputRSID]
if (exists == false){
return false, nil, nil
}
return true, aliasesList, nil
}
//Outputs:
// -bool: Alias found
// -int64: Primary rsID alias to use to represent this locus
// -error
func GetCompanyAliasRSID(companyName string, locusAlias string)(bool, int64, error){
if (companyName == "23andMe"){
locusRSID, exists := companyAliasesMap_23andMe[locusAlias]
if (exists == false){
return false, 0, nil
}
return true, locusRSID, nil
} else if (companyName == "FamilyTreeDNA"){
locusRSID, exists := companyAliasesMap_FamilyTreeDNA[locusAlias]
if (exists == false){
return false, 0, nil
}
return true, locusRSID, nil
} else if (companyName == "MyHeritage"){
locusRSID, exists := companyAliasesMap_MyHeritage[locusAlias]
if (exists == false){
return false, 0, nil
}
return true, locusRSID, nil
}
return false, 0, errors.New("GetCompanyAliasRSID called with invalid companyName: " + companyName)
}
func GetLocusMetadataObjectsList()([]LocusMetadata, error){
buffer := bytes.NewBuffer(LocusMetadataFile)
decoder := gob.NewDecoder(buffer)
var locusMetadataObjectsList []LocusMetadata
err := decoder.Decode(&locusMetadataObjectsList)
if (err != nil){ return nil, err }
return locusMetadataObjectsList, nil
}