seekia/internal/genetics/readBiobankData/openSNP.go

401 lines
7.6 KiB
Go
Raw Normal View History

package readBiobankData
// file openSNP.go provides a datastructure and function to read data from the OpenSNP.org biobank.
import "seekia/internal/helpers"
import "encoding/csv"
import "os"
import "io"
type PhenotypeData_OpenSNP struct{
UserID int
EyeColorIsKnown bool
// Either "Green", "Blue", "Hazel", or "Brown"
EyeColor string
LactoseToleranceIsKnown bool
// true == Is lactose Tolerant
LactoseTolerance bool
HairColorIsKnown bool
HairColor string
HeightIsKnown bool
// Height is expressed in centimeters
Height float64
}
// This function reads the phenotypes_202308230100.csv file in the openSNP biobank data.
//
//Outputs:
// -bool: Able to read file (file is well formed and not corrupt)
// -[]PhenotypeData_OpenSNP
func ReadOpenSNPPhenotypesFile(fileObject *os.File)(bool, []PhenotypeData_OpenSNP){
csvFileReader := csv.NewReader(fileObject)
csvFileReader.LazyQuotes = true
csvFileReader.Comma = ';'
// First we read the first line (header line)
_, err := csvFileReader.Read()
if (err != nil){
// File is corrupt
return false, nil
}
// Now we iterate through each user's phenotype data
//Map Structure: User ID -> Phenotype data object
userPhenotypeDataMap := make(map[int]PhenotypeData_OpenSNP)
for {
userDataLineSlice, err := csvFileReader.Read()
if (err != nil) {
if (err == io.EOF){
// We have reached the end of the file
break
}
// File is corrupt
return false, nil
}
userIDString := userDataLineSlice[0]
userID, err := helpers.ConvertStringToInt(userIDString)
if (err != nil){
// File is corrupt
return false, nil
}
_, exists := userPhenotypeDataMap[userID]
if (exists == true){
// This user has multiple entries
// Each entry is identical except for the raw genome filename
// We will continue
continue
}
//Outputs:
// -bool: User eye color is known
// -string: User eye color
getUserEyeColor := func()(bool, string){
userEyeColorRaw := userDataLineSlice[5]
switch userEyeColorRaw{
case "-":{
return false, ""
}
case "Brown",
"brown",
"Dark brown",
"Brown/black",
"Grey brown":{
return true, "Brown"
}
case "Hazel",
"hazel",
"Brown-green",
"brown-green",
"Hazel/Light Brown",
"Hazel (light brown, dark green, dark blue)",
"Brown-amber",
"Indeterminate brown-green with a subtle grey caste",
"Hazel (brown/green)",
"Green-hazel",
"Amber - (yellow/ocre brown)",
"Hazel/light brown",
"Green-brown",
"green-brown",
"Brown - brown and green in bright sunlight",
"Hazel/yellow",
"Brown-(green when external temperature rises)",
"Ambar-green",
"Olive-brown ringing burnt umber-brown",
"Green with brown freckles",
"Green-Hazel",
"Ambar-Green",
"Brown-Amber",
"Hazel/Yellow",
"Brown center starburst, amber and olive green, with dark gray outer ring":{
return true, "Hazel"
}
case "Blue",
"Blue-grey",
"Blue grey",
"Gray-blue",
"Blue-grey with central heterochromia",
"Dark blue",
"blue",
"Light blue-green",
"blue-grey",
"Dark Grayish-Blue Eyes (like a stone)":{
return true, "Blue"
}
case "Green",
"Green ",
"green",
"Green-gray",
"Blue-green ",
"Blue-green",
"blue-green ",
"Light-mixed green",
"blue-green",
"Blue with a yellow ring of flecks that make my eyes look green depending on the light or my mood",
"Light-mixed Green",
"Blue-green heterochromia",
"Blue-green-grey":{
return true, "Green"
}
//TODO: Add grey as its own seperate color?
}
return false, ""
}
userEyeColorIsKnown, userEyeColor := getUserEyeColor()
//Outputs:
// -bool: User lactose Tolerance is known
// -bool: User lactose Tolerance
getUserLactoseTolerance := func()(bool, bool){
userLactoseToleranceRaw := userDataLineSlice[6]
switch userLactoseToleranceRaw{
case "-":{
return false, false
}
case "Yes",
"Lactose-intolerant",
"Lactose intolerant",
"lactose-intolerant",
" allergic to all forms of dairy ",
" Allergic to all forms of dairy ",
"Severe gi pain ",
"severe GI pain ":{
return true, false
}
case "No",
"Lactose-tolerant",
"Lactose tolerant",
"lactose-tolerant",
"lactose tolerant",
"False":{
return true, true
}
}
return false, false
}
userLactoseToleranceIsKnown, userLactoseTolerance := getUserLactoseTolerance()
//Outputs:
// -bool: User hair color is known
// -string: Hair Color
getUserHairColor := func()(bool, string){
//userHairColorRaw := userDataLineSlice[11]
//TODO
return false, ""
}
userHairColorIsKnown, userHairColor := getUserHairColor()
// Outputs:
// -bool: User height is known
// -int: User height (in centimeters)
getUserHeight := func()(bool, float64){
userHeightRaw := userDataLineSlice[13]
switch userHeightRaw{
case "-":{
return false, 0
}
case `4'0"`:{
return true, 121.92
}
case `4'1"`:{
return true, 124.46
}
case `4'2"`:{
return true, 127
}
case `4'3"`:{
return true, 129.54
}
case `4'4"`:{
return true, 132.08
}
case `4'5"`:{
return true, 134.62
}
case `4'6"`:{
return true, 137.16
}
case `4'7"`:{
return true, 139.7
}
case `4'8"`:{
return true, 142.24
}
case `4'9"`:{
return true, 144.78
}
case `4'10"`:{
return true, 147.32
}
case `4'11"`:{
return true, 149.86
}
case `5'`:{
return true, 152.4
}
case `5'1"`:{
return true, 154.94
}
case `5'2"`:{
return true, 157.48
}
case `5'3"`, `5'3''`, `160 cm`:{
return true, 160
}
case `5'4"`:{
return true, 162.56
}
case `5'5"`:{
return true, 165.1
}
case `5'6"`:{
return true, 167.64
}
case `168 cm`:{
return true, 168
}
case `5'7"`:{
return true, 170.18
}
case `5'8"`:{
return true, 172.72
}
case `5'9"`:{
return true, 175.26
}
case `5'10"`, `5'10''`:{
return true, 177.8
}
case `179 cm`:{
return true, 179
}
case `180cm`:{
return true, 180
}
case `5'11"`:{
return true, 180.34
}
case `6'`:{
return true, 182.88
}
case `183 cm`:{
return true, 183
}
case `6'1"`:{
return true, 185.42
}
case `6'2"`:{
return true, 187.96
}
case `6'3"`:{
return true, 190.5
}
case `6'4"`:{
return true, 193.04
}
case `6'5"`:{
return true, 195.58
}
case `6'6"`:{
return true, 198.12
}
case `6'7"`:{
return true, 200.66
}
case `6'8"`:{
return true, 203.2
}
case `6'9"`:{
return true, 205.74
}
case `6'10"`:{
return true, 208.28
}
case `6'11"`:{
return true, 210.82
}
case `7'`:{
return true, 213.36
}
//TODO: Add more responses
}
return false, 0
}
userHeightIsKnown, userHeight := getUserHeight()
userPhenotypeDataObject := PhenotypeData_OpenSNP{
UserID: userID,
EyeColorIsKnown: userEyeColorIsKnown,
EyeColor: userEyeColor,
LactoseToleranceIsKnown: userLactoseToleranceIsKnown,
LactoseTolerance: userLactoseTolerance,
HairColorIsKnown: userHairColorIsKnown,
HairColor: userHairColor,
HeightIsKnown: userHeightIsKnown,
Height: userHeight,
}
userPhenotypeDataMap[userID] = userPhenotypeDataObject
}
userPhenotypeDataList := helpers.GetListOfMapValues(userPhenotypeDataMap)
return true, userPhenotypeDataList
}