Golang: Parse IMDb database
by admin on Sep.01, 2024, under News
The website IMDb (Internet Movie Data Base) provides datasets for non-commercial use. On a daily basis they provide updated download links to TSV (tab separated values) files which can be downloaded from this link.
We’ll now write a simple Go program to parse the data provided from IMDb:
package main
import (
"compress/gzip"
"encoding/csv"
"fmt"
"os"
)
func main() {
file, err := os.Open("title.basics.tsv.gz")
if err != nil {
fmt.Println("Error opening file:", err)
return
}
defer file.Close()
gz, err := gzip.NewReader(file)
if err != nil {
fmt.Println("Error decompressing file:", err)
return
}
defer gz.Close()
tsvReader := csv.NewReader(gz)
tsvReader.Comma = '\t' // TSV files are tab-separated
tsvReader.LazyQuotes = true // Some fields might have unescaped quotes
// Iterate through records looking for a specific title ID
for {
record, err := tsvReader.Read()
if err != nil {
fmt.Println("Finished reading file or encountered error:", err)
break
}
// Assuming the TSV columns are: tconst, titleType, primaryTitle, originalTitle, isAdult, startYear, endYear, runtimeMinutes, genres
if record[0] == "tt0111161" { // Example: "The Shawshank Redemption"
fmt.Println("Title ID:", record[0])
fmt.Println("Title Type:", record[1])
fmt.Println("Primary Title:", record[2])
fmt.Println("Original Title:", record[3])
fmt.Println("Is Adult:", record[4])
fmt.Println("Start Year:", record[5])
fmt.Println("End Year:", record[6])
fmt.Println("Runtime Minutes:", record[7])
fmt.Println("Genres:", record[8])
break
}
}
}
Running this program will output something like:
Title ID: tt0111161 Title Type: movie Primary Title: The Shawshank Redemption Original Title: The Shawshank Redemption Is Adult: 0 Start Year: 1994 End Year: \N Runtime Minutes: 142 Genres: Drama



