1120 lines
33 KiB
Go
1120 lines
33 KiB
Go
package companyAnalysis
|
|
|
|
import "seekia/internal/allowedText"
|
|
import "seekia/internal/helpers"
|
|
|
|
import "slices"
|
|
import "strings"
|
|
import "errors"
|
|
import "maps"
|
|
|
|
|
|
func VerifyNeanderthalVariants_23andMe(numberOfVariants int)bool{
|
|
|
|
if (numberOfVariants < 0 || numberOfVariants > 7462){
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// These are the maternal and paternal halpogroups we know
|
|
// There are more that should be documented
|
|
// If a user submits one of these, the attribute will be canonical
|
|
// Otherwise, moderators must approve it
|
|
// TODO: Someone needs to figure out all of 23andMe's haplogroups.
|
|
|
|
func GetKnownMaternalHaplogroupsList_23andMe()[]string{
|
|
|
|
return knownMaternalHaplogroupsList
|
|
}
|
|
|
|
func GetKnownPaternalHaplogroupsList_23andMe()[]string{
|
|
|
|
return knownPaternalHaplogroupsList
|
|
}
|
|
|
|
var knownMaternalHaplogroupsList []string = []string{
|
|
"H3",
|
|
"U4a1a",
|
|
"J1",
|
|
"U9a",
|
|
"H1a",
|
|
"U5b1b1a1",
|
|
}
|
|
|
|
var knownPaternalHaplogroupsList []string = []string{
|
|
"R-L1335",
|
|
"E-P147",
|
|
"E-L29",
|
|
"T-CTS8512",
|
|
"R-P311",
|
|
}
|
|
|
|
//Outputs:
|
|
// -bool: Is Valid
|
|
// -bool: Is Canonical
|
|
func VerifyMaternalHaplogroup_23AndMe(inputValue string)(bool, bool){
|
|
|
|
isKnown := slices.Contains(knownMaternalHaplogroupsList, inputValue)
|
|
if (isKnown == true){
|
|
return true, true
|
|
}
|
|
|
|
if (len(inputValue) > 25){
|
|
return false, false
|
|
}
|
|
|
|
isAllowed := allowedText.VerifyStringIsAllowed(inputValue)
|
|
if (isAllowed == false){
|
|
return false, false
|
|
}
|
|
|
|
containsTabsOrNewlines := helpers.CheckIfStringContainsTabsOrNewlines(inputValue)
|
|
if (containsTabsOrNewlines == true){
|
|
return false, false
|
|
}
|
|
|
|
return true, true
|
|
}
|
|
|
|
//Outputs:
|
|
// -bool: Is Valid
|
|
// -bool: Is Canonical
|
|
func VerifyPaternalHaplogroup_23AndMe(inputValue string)(bool, bool){
|
|
|
|
isKnown := slices.Contains(knownPaternalHaplogroupsList, inputValue)
|
|
if (isKnown == true){
|
|
return true, true
|
|
}
|
|
|
|
if (len(inputValue) > 25){
|
|
return false, false
|
|
}
|
|
|
|
isAllowed := allowedText.VerifyStringIsAllowed(inputValue)
|
|
if (isAllowed == false){
|
|
return false, false
|
|
}
|
|
|
|
containsTabsOrNewlines := helpers.CheckIfStringContainsTabsOrNewlines(inputValue)
|
|
if (containsTabsOrNewlines == true){
|
|
return false, false
|
|
}
|
|
|
|
return true, true
|
|
}
|
|
|
|
//TODO: Replace float64 with float32 everywhere where ancestry composition percentage maps exist?
|
|
// We don't need more than 1 decimal of precision, and values only span 0-100
|
|
|
|
//Outputs:
|
|
// -bool: Inputs are valid (all locations are valid and sum to the correct amounts)
|
|
// -string: Composition attribute
|
|
// -error: There is a bug in the function
|
|
func CreateAncestryCompositionAttribute_23andMe(continentPercentagesMap map[string]float64, regionPercentagesMap map[string]float64, subregionPercentagesMap map[string]float64)(bool, string, error){
|
|
|
|
// We will round down all percentages to 1 decimal, because that is the highest precision that 23andMe offers
|
|
|
|
getLocationSection := func(locationPercentagesMap map[string]float64)string{
|
|
|
|
if (len(locationPercentagesMap) == 0){
|
|
return "None"
|
|
}
|
|
|
|
locationItemsList := make([]string, 0)
|
|
|
|
for locationName, locationPercentage := range locationPercentagesMap{
|
|
|
|
if (locationPercentage == 0){
|
|
continue
|
|
}
|
|
|
|
percentageString := helpers.ConvertFloat64ToStringRounded(locationPercentage, 1)
|
|
|
|
locationItem := locationName + "$" + percentageString
|
|
|
|
locationItemsList = append(locationItemsList, locationItem)
|
|
}
|
|
|
|
if (len(locationItemsList) == 0){
|
|
return "None"
|
|
}
|
|
|
|
locationSection := strings.Join(locationItemsList, "#")
|
|
|
|
return locationSection
|
|
}
|
|
|
|
continentsSection := getLocationSection(continentPercentagesMap)
|
|
regionsSection := getLocationSection(regionPercentagesMap)
|
|
subregionsSection := getLocationSection(subregionPercentagesMap)
|
|
|
|
compositionAttribute := continentsSection + "+" + regionsSection + "+" + subregionsSection
|
|
|
|
attributeIsValid, _, _, _, err := ReadAncestryCompositionAttribute_23andMe(true, compositionAttribute)
|
|
if (err != nil){ return false, "", err }
|
|
if (attributeIsValid == false){
|
|
return false, "", nil
|
|
}
|
|
|
|
return true, compositionAttribute, nil
|
|
}
|
|
|
|
// This function's maps only contains locations which have no sublocations
|
|
//Outputs:
|
|
// -bool: Attribute is valid
|
|
// -map[string]float64: Continent percentages map (continent name -> Percentage of total ancestry)
|
|
// -map[string]float64: Region percentages map (region name -> Percentage of total ancestry)
|
|
// -map[string]float64: Subregion percentages map (Subregion name -> Percentage of total ancestry)
|
|
// -error (if there is a bug in this function)
|
|
func ReadAncestryCompositionAttribute_23andMe(verifyData bool, attributeValue string)(bool, map[string]float64, map[string]float64, map[string]float64, error){
|
|
|
|
// Attribute is formatted as follows:
|
|
|
|
// Continents section + "+" + Regions Section + "+" + Sub-Regions section
|
|
|
|
// Continents section is made up of continent items, or "None"
|
|
// Region section is made up of region items, or "None"
|
|
// Subregion section is made up of subregion items, or "None"
|
|
|
|
// Each section's items are separated by "#"
|
|
|
|
// Continent Item: Continent name + "$" + Percentage of whole
|
|
// Region Item: Region name + "$" + Percentage of whole
|
|
// Subregion Item: Subregion name + "$" + Percentage of whole
|
|
|
|
// Only locations with no sub-locations are included
|
|
// Example: Unassigned has no sub-locations, so it is a valid continent to include
|
|
// Thus, all percentage values for all continent, region and subregion locations must sum to 100
|
|
|
|
attributeList := strings.Split(attributeValue, "+")
|
|
|
|
if (len(attributeList) != 3){
|
|
return false, nil, nil, nil, nil
|
|
}
|
|
|
|
//Map Structure: Continent name -> Percentage of total ancestry
|
|
continentPercentagesMap := make(map[string]float64)
|
|
|
|
//Map Structure: Region name -> Percentage of total ancestry
|
|
regionPercentagesMap := make(map[string]float64)
|
|
|
|
//Map Structure: Subregion name -> Percentage of total ancestry
|
|
subregionPercentagesMap := make(map[string]float64)
|
|
|
|
// Outputs:
|
|
// -error: If attribute is invalid
|
|
addLocationItemsToMap := func(locationSection string, locationMap map[string]float64)error{
|
|
|
|
if (locationSection == "None"){
|
|
return nil
|
|
}
|
|
|
|
locationItemsList := strings.Split(locationSection, "#")
|
|
|
|
for _, locationItem := range locationItemsList{
|
|
|
|
locationName, locationPercentage, delimiterFound := strings.Cut(locationItem, "$")
|
|
if (delimiterFound == false){
|
|
return errors.New("Attribute is invalid: contains invalid locationItem")
|
|
}
|
|
|
|
locationPercentageFloat64, err := helpers.ConvertStringToFloat64(locationPercentage)
|
|
if (err != nil){
|
|
return errors.New("Attribute is invalid: location percentage is not float.")
|
|
}
|
|
|
|
if (locationPercentageFloat64 <= 0 || locationPercentageFloat64 > 100){
|
|
return errors.New("Attribute is invalid: location percentage is out of range.")
|
|
}
|
|
|
|
_, exists := locationMap[locationName]
|
|
if (exists == true){
|
|
return errors.New("Attribute is invalid: Contains duplicate locationName")
|
|
}
|
|
|
|
locationMap[locationName] = locationPercentageFloat64
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
continentsSection := attributeList[0]
|
|
regionsSection := attributeList[1]
|
|
subregionsSection := attributeList[2]
|
|
|
|
err := addLocationItemsToMap(continentsSection, continentPercentagesMap)
|
|
if (err != nil){
|
|
return false, nil, nil, nil, nil
|
|
}
|
|
|
|
err = addLocationItemsToMap(regionsSection, regionPercentagesMap)
|
|
if (err != nil){
|
|
return false, nil, nil, nil, nil
|
|
}
|
|
|
|
err = addLocationItemsToMap(subregionsSection, subregionPercentagesMap)
|
|
if (err != nil){
|
|
return false, nil, nil, nil, nil
|
|
}
|
|
|
|
if (verifyData == false){
|
|
return true, continentPercentagesMap, regionPercentagesMap, subregionPercentagesMap, nil
|
|
}
|
|
|
|
// First we make sure all percentages sum to 100
|
|
// These should all be locations which have no sublocations
|
|
|
|
totalSum := float64(0)
|
|
|
|
for _, continentPercentage := range continentPercentagesMap{
|
|
totalSum += continentPercentage
|
|
}
|
|
for _, regionPercentage := range regionPercentagesMap{
|
|
totalSum += regionPercentage
|
|
}
|
|
for _, subregionPercentage := range subregionPercentagesMap{
|
|
totalSum += subregionPercentage
|
|
}
|
|
|
|
if (totalSum != 100){
|
|
return false, nil, nil, nil, nil
|
|
}
|
|
|
|
// Now we add all parent locations
|
|
|
|
mapsAreValid, filledContinentPercentagesMap, _, _, err := AddMissingParentsToAncestryCompositionMaps_23andMe(continentPercentagesMap, regionPercentagesMap, subregionPercentagesMap)
|
|
if (err != nil){ return false, nil, nil, nil, err }
|
|
if (mapsAreValid == false){
|
|
return false, nil, nil, nil, nil
|
|
}
|
|
|
|
// Now we make sure continents total is valid
|
|
// If continents total is valid, the region/subregion totals should also be valid
|
|
|
|
continentsTotal := float64(0)
|
|
|
|
for _, continentPercentage := range filledContinentPercentagesMap{
|
|
continentsTotal += continentPercentage
|
|
}
|
|
|
|
if (continentsTotal != 100){
|
|
// Attribute is invalid: Continents do not sum to 100
|
|
return false, nil, nil, nil, nil
|
|
}
|
|
|
|
return true, continentPercentagesMap, regionPercentagesMap, subregionPercentagesMap, nil
|
|
}
|
|
|
|
// This function will return new maps with the parent locations and their percentages added
|
|
// It takes as input location maps that only contain locations which have no sublocations
|
|
// It is used to read an ancestry composition attribute, and to show a user's progress when they are building their composition
|
|
// It does not verify that the percentages sum to 100, because it receives partially completed maps during the build process
|
|
// It verifies all location names are valid
|
|
//Outputs:
|
|
// -bool: Input maps are valid
|
|
// -map[string]float64: Continent percentages map (with parents added)
|
|
// -map[string]float64: Region percentages map (with parents added)
|
|
// -map[string]float64: Subregion percentages map (with parents added)
|
|
// -error (a bug exists in the function)
|
|
func AddMissingParentsToAncestryCompositionMaps_23andMe(inputContinentPercentagesMap map[string]float64, inputRegionPercentagesMap map[string]float64, inputSubregionPercentagesMap map[string]float64)(bool, map[string]float64, map[string]float64, map[string]float64, error){
|
|
|
|
// We copy the input maps because the we only need to add the missing parents for display, not for encoding
|
|
// We need to maintain the integrity of the input maps
|
|
// The missing parents are only added whenever the composition attribute is displayed in the GUI.
|
|
// This allows the attribute to be smaller in size, reducing the size of profiles.
|
|
|
|
continentPercentagesMap := maps.Clone(inputContinentPercentagesMap)
|
|
regionPercentagesMap := maps.Clone(inputRegionPercentagesMap)
|
|
subregionPercentagesMap := maps.Clone(inputSubregionPercentagesMap)
|
|
|
|
allContinentsList := GetAncestryContinentsList_23andMe()
|
|
|
|
// We make sure all continent names are valid
|
|
|
|
for continentName, _ := range continentPercentagesMap{
|
|
|
|
isValid := slices.Contains(allContinentsList, continentName)
|
|
if (isValid == false){
|
|
// Continent name is unknown
|
|
return false, nil, nil, nil, nil
|
|
}
|
|
}
|
|
|
|
// This list will store the number of regions we should have when complete
|
|
// This enables us to detect unknown region names
|
|
expectedRegionsCount := 0
|
|
|
|
// This list will store the number of subregions we should have when complete
|
|
// This enables us to detect unknown subregion names
|
|
expectedSubregionsCount := 0
|
|
|
|
for _, continentName := range allContinentsList{
|
|
|
|
continentRegionsList, err := GetAncestryContinentRegionsList_23andMe(continentName)
|
|
if (err != nil){
|
|
return false, nil, nil, nil, errors.New("GetAncestryContinentRegionsList_23andMe missing continent regions: " + continentName)
|
|
}
|
|
|
|
if (len(continentRegionsList) == 0){
|
|
// This continent has no sublocations
|
|
// If it exists, it should be included
|
|
continue
|
|
}
|
|
|
|
_, exists := continentPercentagesMap[continentName]
|
|
if (exists == true){
|
|
// Ancestry composition is invalid: Contains continent with sublocations
|
|
// All provided locations should have no sublocations
|
|
return false, nil, nil, nil, nil
|
|
}
|
|
|
|
continentPercentage := float64(0)
|
|
|
|
for _, regionName := range continentRegionsList{
|
|
|
|
regionSubregionsList, err := GetAncestryRegionSubregionsList_23andMe(continentName, regionName)
|
|
if (err != nil){
|
|
return false, nil, nil, nil, errors.New("GetAncestryRegionSubregionsList_23andMe missing region subregions: " + regionName)
|
|
}
|
|
|
|
if (len(regionSubregionsList) == 0){
|
|
|
|
regionPercentage, exists := regionPercentagesMap[regionName]
|
|
if (exists == true){
|
|
continentPercentage += regionPercentage
|
|
expectedRegionsCount += 1
|
|
}
|
|
|
|
continue
|
|
}
|
|
|
|
_, exists := regionPercentagesMap[regionName]
|
|
if (exists == true){
|
|
// Ancestry composition is invalid: Contains region with sublocations
|
|
// All provided locations should have no sublocations
|
|
return false, nil, nil, nil, nil
|
|
}
|
|
|
|
regionPercentage := float64(0)
|
|
|
|
for _, subregionName := range regionSubregionsList{
|
|
|
|
subregionPercentage, exists := subregionPercentagesMap[subregionName]
|
|
if (exists == true){
|
|
regionPercentage += subregionPercentage
|
|
expectedSubregionsCount += 1
|
|
}
|
|
}
|
|
|
|
if (regionPercentage != 0){
|
|
regionPercentagesMap[regionName] = regionPercentage
|
|
expectedRegionsCount += 1
|
|
continentPercentage += regionPercentage
|
|
}
|
|
}
|
|
|
|
if (continentPercentage != 0){
|
|
continentPercentagesMap[continentName] = continentPercentage
|
|
}
|
|
}
|
|
|
|
// We make sure no unknown region/subregion names exist
|
|
|
|
if (len(regionPercentagesMap) != expectedRegionsCount){
|
|
// An unknown region must exist
|
|
return false, nil, nil, nil, nil
|
|
}
|
|
if (len(subregionPercentagesMap) != expectedSubregionsCount){
|
|
// An unknown subregion must exist
|
|
return false, nil, nil, nil, nil
|
|
}
|
|
|
|
return true, continentPercentagesMap, regionPercentagesMap, subregionPercentagesMap, nil
|
|
}
|
|
|
|
// This function does not add the missing parent locations
|
|
//Outputs:
|
|
// -map[string]float64: Continent percentages map
|
|
// -map[string]float64: Region percentages map
|
|
// -map[string]float64: Subregion percentages map
|
|
// -error
|
|
func GetOffspringAncestryComposition_23andMe(personAAncestryCompositionAttribute string, personBAncestryCompositionAttribute string)(map[string]float64, map[string]float64, map[string]float64, error){
|
|
|
|
personAAttributeIsValid, personAContinentPercentagesMap, personARegionPercentagesMap, personASubregionPercentagesMap, err := ReadAncestryCompositionAttribute_23andMe(true, personAAncestryCompositionAttribute)
|
|
if (err != nil) { return nil, nil, nil, err }
|
|
if (personAAttributeIsValid == false){
|
|
return nil, nil, nil, errors.New("GetOffspringAncestryComposition_23andMe called with invalid person A ancestry composition attribute: " + personAAncestryCompositionAttribute)
|
|
}
|
|
|
|
personBAttributeIsValid, personBContinentPercentagesMap, personBRegionPercentagesMap, personBSubregionPercentagesMap, err := ReadAncestryCompositionAttribute_23andMe(true, personBAncestryCompositionAttribute)
|
|
if (err != nil) { return nil, nil, nil, err }
|
|
if (personBAttributeIsValid == false){
|
|
return nil, nil, nil, errors.New("GetOffspringAncestryComposition_23andMe called with invalid person B ancestry composition attribute: " + personBAncestryCompositionAttribute)
|
|
}
|
|
|
|
offspringContinentPercentagesMap := make(map[string]float64)
|
|
|
|
for continentName, continentPercentage := range personAContinentPercentagesMap{
|
|
|
|
personAPercentage := continentPercentage/2
|
|
|
|
offspringContinentPercentagesMap[continentName] = personAPercentage
|
|
}
|
|
|
|
for continentName, continentPercentage := range personBContinentPercentagesMap{
|
|
|
|
personBPercentage := continentPercentage/2
|
|
|
|
offspringContinentPercentagesMap[continentName] += personBPercentage
|
|
}
|
|
|
|
offspringRegionPercentagesMap := make(map[string]float64)
|
|
|
|
for regionName, regionPercentage := range personARegionPercentagesMap{
|
|
|
|
personAPercentage := regionPercentage/2
|
|
|
|
offspringRegionPercentagesMap[regionName] = personAPercentage
|
|
}
|
|
|
|
for regionName, regionPercentage := range personBRegionPercentagesMap{
|
|
|
|
personBPercentage := regionPercentage/2
|
|
|
|
offspringRegionPercentagesMap[regionName] += personBPercentage
|
|
}
|
|
|
|
offspringSubregionPercentagesMap := make(map[string]float64)
|
|
|
|
for subregionName, subregionPercentage := range personASubregionPercentagesMap{
|
|
|
|
personAPercentage := subregionPercentage/2
|
|
|
|
offspringSubregionPercentagesMap[subregionName] = personAPercentage
|
|
}
|
|
|
|
for subregionName, subregionPercentage := range personBSubregionPercentagesMap{
|
|
|
|
personBPercentage := subregionPercentage/2
|
|
|
|
offspringSubregionPercentagesMap[subregionName] += personBPercentage
|
|
}
|
|
|
|
return offspringContinentPercentagesMap, offspringRegionPercentagesMap, offspringSubregionPercentagesMap, nil
|
|
}
|
|
|
|
|
|
// Ancestral similarity aims to compare how closely related a 2 user's ancestors are.
|
|
|
|
//Outputs:
|
|
// -int: A percentage between 0 and 100
|
|
// -error
|
|
func GetAncestralSimilarity_23andMe(verifyAttributes bool, person1AncestryCompositionAttribute string, person2AncestryCompositionAttribute string)(int, error){
|
|
|
|
person1AttributeIsValid, person1ContinentPercentagesMap, person1RegionPercentagesMap, person1SubregionPercentagesMap, err := ReadAncestryCompositionAttribute_23andMe(verifyAttributes, person1AncestryCompositionAttribute)
|
|
if (err != nil) { return 0, err }
|
|
if (person1AttributeIsValid == false){
|
|
return 0, errors.New("GetAncestralSimilarity_23andMe called with invalid person1 23andMe_AncestryComposition attribute: " + person1AncestryCompositionAttribute)
|
|
}
|
|
|
|
person2AttributeIsValid, person2ContinentPercentagesMap, person2RegionPercentagesMap, person2SubregionPercentagesMap, err := ReadAncestryCompositionAttribute_23andMe(verifyAttributes, person2AncestryCompositionAttribute)
|
|
if (err != nil) { return 0, err }
|
|
if (person2AttributeIsValid == false){
|
|
return 0, errors.New("GetAncestralSimilarity_23andMe called with invalid person2 23andMe_AncestryComposition attribute: " + person2AncestryCompositionAttribute)
|
|
}
|
|
|
|
continentsAreEqual := maps.Equal(person1ContinentPercentagesMap, person2ContinentPercentagesMap)
|
|
if (continentsAreEqual == true){
|
|
|
|
regionsAreEqual := maps.Equal(person1RegionPercentagesMap, person2RegionPercentagesMap)
|
|
if (regionsAreEqual == true){
|
|
|
|
subregionsAreEqual := maps.Equal(person1SubregionPercentagesMap, person2SubregionPercentagesMap)
|
|
if (subregionsAreEqual == true){
|
|
|
|
return 100, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
totalSimilarity := float64(0)
|
|
|
|
// Different continents are counted as having no similarity.
|
|
// Different regions within the same continent are counted as having 70% similarity
|
|
// Different subregions within the same region are counted as having 80% similarity
|
|
|
|
for continentName, person1ContinentPercentage := range person1ContinentPercentagesMap{
|
|
|
|
person2ContinentPercentage, exists := person2ContinentPercentagesMap[continentName]
|
|
if (exists == true){
|
|
|
|
continentSimilarity := min(person1ContinentPercentage, person2ContinentPercentage)
|
|
totalSimilarity += continentSimilarity
|
|
}
|
|
}
|
|
|
|
// We iterate through the region/subregion maps twice
|
|
// First we account for exact region/subregion similarity
|
|
// Next, we account for same-parent-location region/subregion similarity
|
|
|
|
person1RegionsList := helpers.GetListOfMapKeys(person1RegionPercentagesMap)
|
|
|
|
for _, regionName := range person1RegionsList{
|
|
|
|
person1RegionPercentage, exists := person1RegionPercentagesMap[regionName]
|
|
if (exists == false){
|
|
return 0, errors.New("person1RegionPercentagesMap missing regionName: " + regionName)
|
|
}
|
|
|
|
person2RegionPercentage, exists := person2RegionPercentagesMap[regionName]
|
|
if (exists == true){
|
|
regionSimilarity := min(person1RegionPercentage, person2RegionPercentage)
|
|
totalSimilarity += regionSimilarity
|
|
|
|
// We subtract the region similarity so we don't
|
|
// count it twice when we account for same-parent-location location similarity
|
|
person1RegionPercentagesMap[regionName] -= regionSimilarity
|
|
person2RegionPercentagesMap[regionName] -= regionSimilarity
|
|
}
|
|
}
|
|
|
|
for person1RegionName, person1RegionPercentage := range person1RegionPercentagesMap{
|
|
|
|
if (person1RegionPercentage == 0){
|
|
continue
|
|
}
|
|
|
|
person1RegionContinent, err := GetAncestryRegionParentContinent_23andMe(person1RegionName)
|
|
if (err != nil){ return 0, err }
|
|
|
|
// We iterate through person2 regions and find other regions belonging to the same continent
|
|
|
|
for person2RegionName, person2RegionPercentage := range person2RegionPercentagesMap{
|
|
|
|
person2RegionContinent, err := GetAncestryRegionParentContinent_23andMe(person2RegionName)
|
|
if (err != nil){ return 0, err }
|
|
|
|
if (person1RegionContinent == person2RegionContinent){
|
|
|
|
regionSimilarity := min(person1RegionPercentage, person2RegionPercentage)
|
|
|
|
// We say regions from the same continent are 70% similar
|
|
totalSimilarity += (regionSimilarity * 0.7)
|
|
|
|
person1RegionPercentage -= regionSimilarity
|
|
person2RegionPercentagesMap[person2RegionName] -= regionSimilarity
|
|
}
|
|
|
|
if (person1RegionPercentage == 0){
|
|
// We have accounted for any similar regions person2 has
|
|
// We will continue on to the next region in our composition
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
person1SubregionsList := helpers.GetListOfMapKeys(person1SubregionPercentagesMap)
|
|
|
|
for _, subregionName := range person1SubregionsList{
|
|
|
|
person1SubregionPercentage, exists := person1SubregionPercentagesMap[subregionName]
|
|
if (exists == false){
|
|
return 0, errors.New("person1SubregionPercentagesMap missing subregionName: " + subregionName)
|
|
}
|
|
|
|
person2SubregionPercentage, exists := person2SubregionPercentagesMap[subregionName]
|
|
if (exists == true){
|
|
subregionSimilarity := min(person1SubregionPercentage, person2SubregionPercentage)
|
|
totalSimilarity += subregionSimilarity
|
|
|
|
// We subtract the subregion similarity so we don't
|
|
// count it twice when we account for same-parent-location location similarity
|
|
person1SubregionPercentagesMap[subregionName] -= subregionSimilarity
|
|
person2SubregionPercentagesMap[subregionName] -= subregionSimilarity
|
|
}
|
|
}
|
|
|
|
for person1SubregionName, person1SubregionPercentage := range person1SubregionPercentagesMap{
|
|
|
|
if (person1SubregionPercentage == 0){
|
|
continue
|
|
}
|
|
|
|
person1SubregionRegion, err := GetAncestrySubregionParentRegion_23andMe(person1SubregionName)
|
|
if (err != nil){ return 0, err }
|
|
|
|
// We iterate through person2 subregions and find other subregions belonging to the same region
|
|
|
|
for person2SubregionName, person2SubregionPercentage := range person2SubregionPercentagesMap{
|
|
|
|
person2SubregionRegion, err := GetAncestrySubregionParentRegion_23andMe(person2SubregionName)
|
|
if (err != nil){ return 0, err }
|
|
|
|
if (person1SubregionRegion == person2SubregionRegion){
|
|
|
|
subregionSimilarity := min(person1SubregionPercentage, person2SubregionPercentage)
|
|
|
|
// We say subregions from the same region are 80% similar
|
|
totalSimilarity += (subregionSimilarity * 0.8)
|
|
|
|
person1SubregionPercentage -= subregionSimilarity
|
|
person2SubregionPercentagesMap[person2SubregionName] -= subregionSimilarity
|
|
}
|
|
|
|
if (person1SubregionPercentage == 0){
|
|
// We have accounted for any similar subregions person2 has
|
|
// We will continue on to the next subregion in our composition
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
totalSimilarityInt, err := helpers.FloorFloat64ToInt(totalSimilarity)
|
|
if (err != nil) { return 0, err }
|
|
|
|
if (totalSimilarityInt < 0 || totalSimilarityInt > 100){
|
|
totalSimilarityString := helpers.ConvertIntToString(totalSimilarityInt)
|
|
return 0, errors.New("totalSimilarity is out of bounds after calculating ancestral similarity: " + totalSimilarityString)
|
|
}
|
|
|
|
return totalSimilarityInt, nil
|
|
}
|
|
|
|
// This is used to get all the names needed for translations
|
|
func GetAllAncestryLocationsList_23andMe()([]string, error){
|
|
|
|
continentsList := GetAncestryContinentsList_23andMe()
|
|
|
|
allLocationsList := make([]string, 0, len(continentsList))
|
|
|
|
for _, continentName := range continentsList{
|
|
|
|
allLocationsList = append(allLocationsList, continentName)
|
|
|
|
continentRegionsList, err := GetAncestryContinentRegionsList_23andMe(continentName)
|
|
if (err != nil) { return nil, err }
|
|
|
|
for _, regionName := range continentRegionsList{
|
|
|
|
allLocationsList = append(allLocationsList, regionName)
|
|
|
|
subregionsList, err := GetAncestryRegionSubregionsList_23andMe(continentName, regionName)
|
|
if (err != nil) { return nil, err }
|
|
|
|
allLocationsList = append(allLocationsList, subregionsList...)
|
|
}
|
|
}
|
|
|
|
return allLocationsList, nil
|
|
}
|
|
|
|
// Categories are structured as follows: Continent -> Region -> Subregion
|
|
func GetAncestryContinentsList_23andMe()[]string{
|
|
|
|
continentsList := []string{
|
|
"Central & South Asian",
|
|
"East Asian",
|
|
"European",
|
|
"Indigenous American",
|
|
"Melanesian",
|
|
"Sub-Saharan African",
|
|
"Western Asian & North African",
|
|
"Unassigned"}
|
|
|
|
return continentsList
|
|
}
|
|
|
|
func GetAncestryContinentRegionsList_23andMe(continentName string)([]string, error){
|
|
|
|
switch continentName{
|
|
|
|
case "Sub-Saharan African":{
|
|
|
|
regionsList := []string{"West African",
|
|
"Northern East African",
|
|
"Congolese & Southern East African",
|
|
"African Hunter-Gatherer",
|
|
"Broadly Sub-Saharan African"}
|
|
|
|
return regionsList, nil
|
|
}
|
|
case "East Asian":{
|
|
|
|
regionsList := []string{"North Asian",
|
|
"Chinese",
|
|
"Vietnamese",
|
|
"Filipino & Austronesian",
|
|
"Indonesian, Khmer, Thai & Myanma",
|
|
"Chinese Dai",
|
|
"Japanese",
|
|
"Korean",
|
|
"Broadly East Asian"}
|
|
return regionsList, nil
|
|
}
|
|
case "European":{
|
|
|
|
regionsList := []string{"Northwestern European",
|
|
"Southern European",
|
|
"Eastern European",
|
|
"Ashkenazi Jewish",
|
|
"Broadly European"}
|
|
|
|
return regionsList, nil
|
|
}
|
|
case "Western Asian & North African":{
|
|
|
|
regionsList := []string{"Northern West Asian",
|
|
"Arab, Egyptian & Levantine",
|
|
"North African",
|
|
"Broadly Western Asian & North African"}
|
|
return regionsList, nil
|
|
}
|
|
case "Central & South Asian":{
|
|
|
|
regionsList := []string{"Central Asian",
|
|
"Northern Indian & Pakistani",
|
|
"Bengali & Northeast Indian",
|
|
"Gujarati Patidar",
|
|
"Southern Indian Subgroup",
|
|
"Southern Indian & Sri Lankan",
|
|
"Malayali Subgroup",
|
|
"Broadly Central & South Asian"}
|
|
|
|
return regionsList, nil
|
|
}
|
|
case "Melanesian", "Indigenous American", "Unassigned":{
|
|
|
|
regionsList := make([]string, 0)
|
|
|
|
return regionsList, nil
|
|
}
|
|
}
|
|
|
|
return nil, errors.New("GetAncestryContinentRegionsList_23andMe called with unknown continentName: " + continentName)
|
|
}
|
|
|
|
|
|
func GetAncestryRegionSubregionsList_23andMe(continentName string, regionName string)([]string, error){
|
|
|
|
switch continentName{
|
|
|
|
case "Sub-Saharan African":{
|
|
|
|
switch regionName{
|
|
|
|
case "West African":{
|
|
|
|
subregionsList := []string{"Senegambian & Guinean",
|
|
"Ghanaian, Liberian & Sierra Leonean",
|
|
"Nigerian",
|
|
"Broadly West African"}
|
|
|
|
return subregionsList, nil
|
|
}
|
|
case "Northern East African":{
|
|
|
|
subregionsList := []string{"Sudanese",
|
|
"Ethiopian & Eritrean",
|
|
"Somali",
|
|
"Broadly Northern East African"}
|
|
|
|
return subregionsList, nil
|
|
}
|
|
case "Congolese & Southern East African":{
|
|
|
|
subregionsList := []string{"Angolan & Congolese",
|
|
"Southern East African",
|
|
"Broadly Congolese & Southern East African"}
|
|
|
|
return subregionsList, nil
|
|
}
|
|
|
|
case "African Hunter-Gatherer", "Broadly Sub-Saharan African":{
|
|
emptyList := make([]string, 0)
|
|
|
|
return emptyList, nil
|
|
}
|
|
}
|
|
return nil, errors.New("GetAncestryRegionSubregionsList_23andMe called with unknown region for " + continentName + ": " + regionName)
|
|
}
|
|
case "East Asian":{
|
|
|
|
switch regionName{
|
|
|
|
case "North Asian":{
|
|
|
|
subregionsList := []string{"Siberian",
|
|
"Manchurian & Mongolian",
|
|
"Broadly North Asian"}
|
|
|
|
return subregionsList, nil
|
|
}
|
|
case "Chinese":{
|
|
|
|
subregionsList := []string{"Northern Chinese & Tibetan",
|
|
"Southern Chinese & Taiwanese",
|
|
"South Chinese",
|
|
"Broadly Chinese"}
|
|
return subregionsList, nil
|
|
}
|
|
case "Vietnamese",
|
|
"Filipino & Austronesian",
|
|
"Indonesian, Khmer, Thai & Myanma",
|
|
"Chinese Dai",
|
|
"Japanese",
|
|
"Korean",
|
|
"Broadly East Asian":{
|
|
|
|
subregionsList := make([]string, 0)
|
|
|
|
return subregionsList, nil
|
|
}
|
|
}
|
|
|
|
return nil, errors.New("GetAncestryRegionSubregionsList_23andMe called with unknown region for " + continentName + ": " + regionName)
|
|
}
|
|
|
|
case "European":{
|
|
|
|
switch regionName {
|
|
|
|
case "Northwestern European":{
|
|
|
|
subregionsList := []string{"British & Irish",
|
|
"Finnish",
|
|
"French & German",
|
|
"Scandinavian",
|
|
"Broadly Northwestern European"}
|
|
|
|
return subregionsList, nil
|
|
}
|
|
case "Southern European":{
|
|
|
|
subregionsList := []string{"Greek & Balkan",
|
|
"Spanish & Portuguese",
|
|
"Italian",
|
|
"Sardinian",
|
|
"Broadly Southern European"}
|
|
|
|
return subregionsList, nil
|
|
}
|
|
|
|
case "Eastern European", "Ashkenazi Jewish", "Broadly European":{
|
|
|
|
emptyList := make([]string, 0)
|
|
|
|
return emptyList, nil
|
|
}
|
|
}
|
|
|
|
return nil, errors.New("GetAncestryRegionSubregionsList_23andMe called with unknown region for " + continentName + ": " + regionName)
|
|
}
|
|
|
|
case "Western Asian & North African":{
|
|
|
|
switch regionName{
|
|
|
|
case "Northern West Asian":{
|
|
|
|
subregionsList := []string{"Cypriot",
|
|
"Anatolian",
|
|
"Iranian, Caucasian & Mesopotamian",
|
|
"Broadly Northern West Asian"}
|
|
|
|
return subregionsList, nil
|
|
}
|
|
case "Arab, Egyptian & Levantine":{
|
|
|
|
subregionsList := []string{"Peninsular Arab",
|
|
"Levantine",
|
|
"Egyptian",
|
|
"Coptic Egyptian",
|
|
"Broadly Arab, Egyptian, & Levantine"}
|
|
|
|
return subregionsList, nil
|
|
}
|
|
case "North African", "Broadly Western Asian & North African":{
|
|
|
|
emptyList := make([]string, 0)
|
|
|
|
return emptyList, nil
|
|
}
|
|
}
|
|
|
|
return nil, errors.New("GetAncestryRegionSubregionsList_23andMe called with unknown region for " + continentName + ": " + regionName)
|
|
}
|
|
|
|
case "Central & South Asian":{
|
|
|
|
switch regionName{
|
|
|
|
case "Central Asian",
|
|
"Northern Indian & Pakistani",
|
|
"Bengali & Northeast Indian",
|
|
"Gujarati Patidar",
|
|
"Southern Indian Subgroup",
|
|
"Southern Indian & Sri Lankan",
|
|
"Malayali Subgroup",
|
|
"Broadly Central & South Asian":{
|
|
|
|
emptyList := make([]string, 0)
|
|
|
|
return emptyList, nil
|
|
}
|
|
}
|
|
|
|
return nil, errors.New("GetAncestryRegionSubregionsList_23andMe called with unknown region for " + continentName + ": " + regionName)
|
|
}
|
|
}
|
|
|
|
return nil, errors.New("GetAncestryRegionSubregionsList_23andMe called with unknown continent name: " + continentName)
|
|
}
|
|
|
|
|
|
|
|
// This returns the continent that a region belongs to
|
|
func GetAncestryRegionParentContinent_23andMe(regionName string)(string, error){
|
|
|
|
switch regionName{
|
|
|
|
case "West African",
|
|
"Northern East African",
|
|
"Congolese & Southern East African",
|
|
"African Hunter-Gatherer",
|
|
"Broadly Sub-Saharan African":{
|
|
|
|
return "Sub-Saharan African", nil
|
|
}
|
|
|
|
case "North Asian",
|
|
"Chinese",
|
|
"Vietnamese",
|
|
"Filipino & Austronesian",
|
|
"Indonesian, Khmer, Thai & Myanma",
|
|
"Chinese Dai",
|
|
"Japanese",
|
|
"Korean",
|
|
"Broadly East Asian":{
|
|
|
|
return "East Asian", nil
|
|
}
|
|
|
|
case "Northwestern European",
|
|
"Southern European",
|
|
"Eastern European",
|
|
"Ashkenazi Jewish",
|
|
"Broadly European":{
|
|
|
|
return "European", nil
|
|
}
|
|
case "Northern West Asian",
|
|
"Arab, Egyptian & Levantine",
|
|
"North African",
|
|
"Broadly Western Asian & North African":{
|
|
|
|
return "Western Asian & North African", nil
|
|
}
|
|
case "Central Asian",
|
|
"Northern Indian & Pakistani",
|
|
"Bengali & Northeast Indian",
|
|
"Gujarati Patidar",
|
|
"Southern Indian Subgroup",
|
|
"Southern Indian & Sri Lankan",
|
|
"Malayali Subgroup",
|
|
"Broadly Central & South Asian":{
|
|
|
|
return "Central & South Asian", nil
|
|
}
|
|
}
|
|
|
|
return "", errors.New("GetAncestryRegionContinent_23andMe called with unknown region: " + regionName)
|
|
}
|
|
|
|
// This returns the region that a subregion belongs to
|
|
func GetAncestrySubregionParentRegion_23andMe(subregionName string)(string, error){
|
|
|
|
switch subregionName{
|
|
|
|
// Continent == "Sub-Saharan African"
|
|
|
|
case "Senegambian & Guinean",
|
|
"Ghanaian, Liberian & Sierra Leonean",
|
|
"Nigerian",
|
|
"Broadly West African":{
|
|
|
|
return "West African", nil
|
|
}
|
|
case "Sudanese",
|
|
"Ethiopian & Eritrean",
|
|
"Somali",
|
|
"Broadly Northern East African":{
|
|
|
|
return "Northern East African", nil
|
|
}
|
|
case "Angolan & Congolese",
|
|
"Southern East African",
|
|
"Broadly Congolese & Southern East African":{
|
|
|
|
return "Congolese & Southern East African", nil
|
|
}
|
|
|
|
// Continent == "East Asian"
|
|
|
|
case "Siberian",
|
|
"Manchurian & Mongolian",
|
|
"Broadly North Asian":{
|
|
|
|
return "North Asian", nil
|
|
}
|
|
case "Northern Chinese & Tibetan",
|
|
"Southern Chinese & Taiwanese",
|
|
"South Chinese",
|
|
"Broadly Chinese":{
|
|
|
|
return "Chinese", nil
|
|
}
|
|
|
|
// Continent == "European"
|
|
|
|
case "British & Irish",
|
|
"Finnish",
|
|
"French & German",
|
|
"Scandinavian",
|
|
"Broadly Northwestern European":{
|
|
|
|
return "Northwestern European", nil
|
|
}
|
|
case "Greek & Balkan",
|
|
"Spanish & Portuguese",
|
|
"Italian",
|
|
"Sardinian",
|
|
"Broadly Southern European":{
|
|
|
|
return "Southern European", nil
|
|
}
|
|
|
|
// Continent == "Western Asian & North African"
|
|
|
|
case "Cypriot",
|
|
"Anatolian",
|
|
"Iranian, Caucasian & Mesopotamian",
|
|
"Broadly Northern West Asian":{
|
|
|
|
return "Northern West Asian", nil
|
|
}
|
|
case "Peninsular Arab",
|
|
"Levantine",
|
|
"Egyptian",
|
|
"Coptic Egyptian",
|
|
"Broadly Arab, Egyptian, & Levantine":{
|
|
|
|
return "Arab, Egyptian & Levantine", nil
|
|
}
|
|
}
|
|
|
|
return "", errors.New("GetAncestrySubregionRegion_23andMe called with unknown subregion: " + subregionName)
|
|
}
|
|
|
|
|