Please refer to for detailed documentation and tutorials.
================================================ FILE: R/inst/run.R ================================================ suppressPackageStartupMessages(library(metaflow)) flowRDS_file <- "flow.RDS" flowRDS_arg <- Filter(function(arg) { startsWith(arg, "--flowRDS") }, commandArgs()) if (length(flowRDS_arg) == 1) { flowRDS_file <- strsplit(flowRDS_arg[1], "=")[[1]][2] } else { stop("missing --flowRDS file command in the command line arguments") } if (!file.exists(flowRDS_file)) { stop(sprintf("Cannot locate flow RDS file: %s", flowRDS_file)) } flow <- readRDS(flowRDS_file) rfuncs <- flow$get_functions() r_functions <- reticulate::dict(rfuncs, convert = TRUE) flow_script <- flow$get_flow() for (fname in names(rfuncs)) { assign(fname, rfuncs[[fname]], envir = .GlobalEnv) } runtime_args <- function(arg) { return(!startsWith(arg, "--flowRDS")) } mf <- reticulate::import("metaflow", delay_load = TRUE) mf$R$run( flow_script, r_functions, flowRDS_file, Filter(runtime_args, commandArgs(trailingOnly = TRUE)), c(commandArgs(trailingOnly = FALSE), flowRDS_arg), metaflow_location(flowRDS = flowRDS_file), container_image(), r_version(), paste(R.version.string), paste(getRversion()) ) ================================================ FILE: R/inst/run_batch.R ================================================ Sys.setenv(R_CONFIG_ACTIVE = "batch") install_dep <- function(dep) { if (!suppressMessages(require(dep, character.only = TRUE))) { suppressMessages(install.packages(dep, quiet = TRUE, repos = "https://cloud.r-project.org/")) } } # dependencies for metaflow invisible(lapply(c("R6", "reticulate", "magrittr", "cli", "lubridate", "digest"), install_dep)) # install numpy and pandas in Python to handle R matrix and data.frame system("python3 -m pip install numpy pandas -qqq") Sys.setenv(METAFLOW_PYTHON = system("which python3", intern=TRUE)) # the remote code package places the R package under the metaflow-r folder suppressMessages(install.packages("./metaflow-r", quiet = TRUE, repos = NULL, type = "source")) suppressWarnings(suppressMessages(library(metaflow, warn.conflicts = FALSE, quietly = TRUE))) flowRDS_file <- "flow.RDS" flowRDS_arg <- Filter(function(arg) { startsWith(arg, "--flowRDS") }, commandArgs()) if (length(flowRDS_arg) == 1) { flowRDS_file <- strsplit(flowRDS_arg[1], "=")[[1]][2] } else { stop("missing --flowRDS file command in the command line arguments") } if (!file.exists(flowRDS_file)) { stop(sprintf("Cannot locate flow RDS file: %s", flowRDS_file)) } flow <- readRDS(flowRDS_file) rfuncs <- flow$get_functions() r_functions <- reticulate::dict(rfuncs, convert = TRUE) flow_script <- flow$get_flow() for (fname in names(rfuncs)) { assign(fname, rfuncs[[fname]], envir = .GlobalEnv) } runtime_args <- function(arg) { return(!startsWith(arg, "--flowRDS")) } mf <- reticulate::import("metaflow", delay_load = TRUE) mf$R$run( flow_script, r_functions, flowRDS_file, Filter(runtime_args, commandArgs(trailingOnly = TRUE)), c(commandArgs(trailingOnly = FALSE), flowRDS_arg), metaflow_location(flowRDS = flowRDS_file), container_image(), r_version(), paste(R.version.string), paste(getRversion()) ) ================================================ FILE: R/inst/tutorials/00-helloworld/README.md ================================================ # Episode 00-helloworld: Metaflow says Hi! **This flow is a simple linear workflow that verifies your installation by printing out 'Metaflow says: Hi!' to the terminal.** #### Showcasing: - Basics of Metaflow. - Step decorator. #### To play this episode: 1. ```cd tutorials/00-helloworld``` 2. ```Rscript helloworld.R show``` 3. ```Rscript helloworld.R run``` If you are using RStudio, you can run this script by directly executing `source("helloworld.R")`. ================================================ FILE: R/inst/tutorials/00-helloworld/helloworld.R ================================================ # A flow where Metaflow prints 'Hi'. # Run this flow to validate that Metaflow is installed correctly. library(metaflow) # This is the 'start' step. All flows must have a step named # 'start' that is the first step in the flow. start <- function(self){ print("HelloFlow is starting.") } # A step for metaflow to introduce itself. hello <- function(self){ print("Metaflow says: Hi!") } # This is the 'end' step. All flows must have an 'end' step, # which is the last step in the flow. end <- function(self){ print("HelloFlow is all done.") } metaflow("HelloFlow") %>% step(step = "start", r_function = start, next_step = "hello") %>% step(step = "hello", r_function = hello, next_step = "end") %>% step(step = "end", r_function = end) %>% run() ================================================ FILE: R/inst/tutorials/01-playlist/README.md ================================================ # Episode 01-playlist: Let's build you a movie playlist. **This flow loads a movie metadata CSV file and builds a playlist for your favorite movie genre. Everything in Metaflow is versioned, so you can run it multiple times and view all the historical playlists with the Metaflow client in an R Markdown Notebook.** #### Showcasing: - Basic Metaflow Parameters. - Running workflow branches in parallel and joining results. - Using the Metaflow client in an R Markdown Notebook. #### To play this episode: ##### Execute the flow: Inside a terminal: 1. ```cd tutorials/01-playlist/``` 2. ```Rscript playlist.R show``` 3. ```Rscript playlist.R run``` 4. ```Rscript playlist.R run --genre comedy``` If you are using RStudio, you can replace the `run()` in last line in `playlist.R` with `run(genre="comedy")`, and run the episode by executing `source("playlist.R")` in RStudio. ##### Inspect the results Open the R Markdown file ```playlist.Rmd``` in RStudio and execute the markdown cells. ================================================ FILE: R/inst/tutorials/01-playlist/movies.csv ================================================ movie_title,title_year,genre,gross Avatar,2009,Action,760505847 Pirates of the Caribbean: At World's End,2007,Fantasy,309404152 Spectre,2015,Thriller,200074175 The Dark Knight Rises,2012,Thriller,448130642 John Carter,2012,Action,73058679 Spider-Man 3,2007,Romance,336530303 Tangled,2010,Romance,200807262 Avengers: Age of Ultron,2015,Action,458991599 Harry Potter and the Half-Blood Prince,2009,Fantasy,301956980 Batman v Superman: Dawn of Justice,2016,Adventure,330249062 Superman Returns,2006,Adventure,200069408 Quantum of Solace,2008,Action,168368427 Pirates of the Caribbean: Dead Man's Chest,2006,Action,423032628 The Lone Ranger,2013,Adventure,89289910 Man of Steel,2013,Action,291021565 The Chronicles of Narnia: Prince Caspian,2008,Family,141614023 The Avengers,2012,Adventure,623279547 Pirates of the Caribbean: On Stranger Tides,2011,Action,241063875 Men in Black 3,2012,Sci-Fi,179020854 The Hobbit: The Battle of the Five Armies,2014,Adventure,255108370 The Amazing Spider-Man,2012,Fantasy,262030663 Robin Hood,2010,Drama,105219735 The Hobbit: The Desolation of Smaug,2013,Adventure,258355354 The Golden Compass,2007,Fantasy,70083519 King Kong,2005,Drama,218051260 Titanic,1997,Drama,658672302 Captain America: Civil War,2016,Adventure,407197282 Battleship,2012,Sci-Fi,65173160 Jurassic World,2015,Thriller,652177271 Skyfall,2012,Action,304360277 Spider-Man 2,2004,Romance,373377893 Iron Man 3,2013,Adventure,408992272 Alice in Wonderland,2010,Adventure,334185206 X-Men: The Last Stand,2006,Sci-Fi,234360014 Monsters University,2013,Fantasy,268488329 Transformers: Revenge of the Fallen,2009,Adventure,402076689 Transformers: Age of Extinction,2014,Sci-Fi,245428137 Oz the Great and Powerful,2013,Family,234903076 The Amazing Spider-Man 2,2014,Fantasy,202853933 TRON: Legacy,2010,Sci-Fi,172051787 Cars 2,2011,Comedy,191450875 Green Lantern,2011,Action,116593191 Toy Story 3,2010,Adventure,414984497 Terminator Salvation,2009,Action,125320003 Furious 7,2015,Crime,350034110 World War Z,2013,Thriller,202351611 X-Men: Days of Future Past,2014,Fantasy,233914986 Star Trek Into Darkness,2013,Adventure,228756232 Jack the Giant Slayer,2013,Fantasy,65171860 The Great Gatsby,2013,Drama,144812796 Prince of Persia: The Sands of Time,2010,Romance,90755643 Pacific Rim,2013,Action,101785482 Transformers: Dark of the Moon,2011,Sci-Fi,352358779 Indiana Jones and the Kingdom of the Crystal Skull,2008,Action,317011114 Brave,2012,Family,237282182 Star Trek Beyond,2016,Thriller,130468626 WALL·E,2008,Animation,223806889 Rush Hour 3,2007,Action,140080850 2012,2009,Action,166112167 A Christmas Carol,2009,Fantasy,137850096 Jupiter Ascending,2015,Sci-Fi,47375327 The Legend of Tarzan,2016,Romance,124051759 "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe",2005,Adventure,291709845 X-Men: Apocalypse,2016,Adventure,154985087 The Dark Knight,2008,Thriller,533316061 Up,2009,Family,292979556 Monsters vs. Aliens,2009,Action,198332128 Iron Man,2008,Action,318298180 Hugo,2011,Family,73820094 Wild Wild West,1999,Sci-Fi,113745408 The Mummy: Tomb of the Dragon Emperor,2008,Fantasy,102176165 Suicide Squad,2016,Adventure,161087183 Evan Almighty,2007,Family,100289690 Edge of Tomorrow,2014,Adventure,100189501 Waterworld,1995,Sci-Fi,88246220 G.I. Joe: The Rise of Cobra,2009,Sci-Fi,150167630 Inside Out,2015,Comedy,356454367 The Jungle Book,2016,Drama,362645141 Iron Man 2,2010,Sci-Fi,312057433 Snow White and the Huntsman,2012,Action,155111815 Maleficent,2014,Fantasy,241407328 Dawn of the Planet of the Apes,2014,Drama,208543795 47 Ronin,2013,Fantasy,38297305 Captain America: The Winter Soldier,2014,Action,259746958 Shrek Forever After,2010,Animation,238371987 Tomorrowland,2015,Action,93417865 Big Hero 6,2014,Adventure,222487711 Wreck-It Ralph,2012,Sci-Fi,189412677 The Polar Express,2004,Animation,665426 Independence Day: Resurgence,2016,Adventure,102315545 How to Train Your Dragon,2010,Adventure,217387997 Terminator 3: Rise of the Machines,2003,Action,150350192 Guardians of the Galaxy,2014,Adventure,333130696 Interstellar,2014,Drama,187991439 Inception,2010,Sci-Fi,292568851 The Fast and the Furious,2001,Crime,144512310 The Curious Case of Benjamin Button,2008,Drama,127490802 X-Men: First Class,2011,Sci-Fi,146405371 The Hunger Games: Mockingjay - Part 2,2015,Sci-Fi,281666058 The Sorcerer's Apprentice,2010,Adventure,63143812 Poseidon,2006,Action,60655503 Alice Through the Looking Glass,2016,Fantasy,76846624 Shrek the Third,2007,Comedy,320706665 Warcraft,2016,Fantasy,46978995 Terminator Genisys,2015,Adventure,89732035 The Chronicles of Narnia: The Voyage of the Dawn Treader,2010,Adventure,104383624 Pearl Harbor,2001,War,198539855 Transformers,2007,Action,318759914 Alexander,2004,Biography,34293771 Harry Potter and the Order of the Phoenix,2007,Family,292000866 Harry Potter and the Goblet of Fire,2005,Family,289994397 Hancock,2008,Action,227946274 I Am Legend,2007,Sci-Fi,256386216 Charlie and the Chocolate Factory,2005,Adventure,206456431 Ratatouille,2007,Comedy,206435493 Batman Begins,2005,Adventure,205343774 Madagascar: Escape 2 Africa,2008,Comedy,179982968 Night at the Museum: Battle of the Smithsonian,2009,Comedy,177243721 X-Men Origins: Wolverine,2009,Thriller,179883016 The Matrix Revolutions,2003,Action,139259759 Frozen,2013,Adventure,400736600 The Matrix Reloaded,2003,Action,281492479 Thor: The Dark World,2013,Adventure,206360018 Mad Max: Fury Road,2015,Action,153629485 Angels & Demons,2009,Mystery,133375846 Thor,2011,Fantasy,181015141 Bolt,2008,Comedy,114053579 G-Force,2009,Fantasy,119420252 Wrath of the Titans,2012,Adventure,83640426 Dark Shadows,2012,Horror,79711678 Mission: Impossible - Rogue Nation,2015,Thriller,195000874 The Wolfman,2010,Drama,61937495 The Legend of Tarzan,2016,Adventure,124051759 Bee Movie,2007,Family,126597121 Kung Fu Panda 2,2011,Action,165230261 The Last Airbender,2010,Action,131564731 Mission: Impossible III,2006,Adventure,133382309 White House Down,2013,Thriller,73103784 Mars Needs Moms,2011,Family,21379315 Flushed Away,2006,Family,64459316 Pan,2015,Adventure,34964818 Mr. Peabody & Sherman,2014,Adventure,111505642 Troy,2004,Adventure,133228348 Madagascar 3: Europe's Most Wanted,2012,Family,216366733 Die Another Day,2002,Thriller,160201106 Ghostbusters,2016,Action,118099659 Armageddon,1998,Sci-Fi,201573391 Men in Black II,2002,Action,190418803 Beowulf,2007,Adventure,82161969 Kung Fu Panda 3,2016,Comedy,143523463 Mission: Impossible - Ghost Protocol,2011,Action,209364921 Rise of the Guardians,2012,Fantasy,103400692 Fun with Dick and Jane,2005,Comedy,110332737 The Last Samurai,2003,Action,111110575 Exodus: Gods and Kings,2014,Drama,65007045 Star Trek,2009,Sci-Fi,257704099 Spider-Man,2002,Romance,403706375 How to Train Your Dragon 2,2014,Action,176997107 Gods of Egypt,2016,Action,31141074 Stealth,2005,Adventure,31704416 Watchmen,2009,Mystery,107503316 Lethal Weapon 4,1998,Thriller,129734803 Hulk,2003,Sci-Fi,132122995 G.I. Joe: Retaliation,2013,Thriller,122512052 Sahara,2005,Comedy,68642452 Final Fantasy: The Spirits Within,2001,Animation,32131830 Captain America: The First Avenger,2011,Adventure,176636816 The World Is Not Enough,1999,Adventure,126930660 Master and Commander: The Far Side of the World,2003,Adventure,93926386 The Twilight Saga: Breaking Dawn - Part 2,2012,Drama,292298923 Happy Feet 2,2011,Musical,63992328 The Incredible Hulk,2008,Adventure,134518390 The BFG,2016,Family,52792307 The Revenant,2015,Drama,183635922 Turbo,2013,Animation,83024900 Rango,2011,Adventure,123207194 Penguins of Madagascar,2014,Animation,83348920 The Bourne Ultimatum,2007,Thriller,227137090 Kung Fu Panda,2008,Animation,215395021 Ant-Man,2015,Action,180191634 The Hunger Games: Catching Fire,2013,Thriller,424645577 The Twilight Saga: Breaking Dawn - Part 2,2012,Adventure,292298923 Home,2015,Sci-Fi,177343675 War of the Worlds,2005,Adventure,234277056 Bad Boys II,2003,Crime,138396624 Puss in Boots,2011,Family,149234747 Salt,2010,Crime,118311368 Noah,2014,Adventure,101160529 The Adventures of Tintin,2011,Action,77564037 Harry Potter and the Prisoner of Azkaban,2004,Adventure,249358727 Australia,2008,Romance,49551662 After Earth,2013,Action,60522097 Dinosaur,2000,Animation,137748063 Night at the Museum: Secret of the Tomb,2014,Fantasy,113733726 Megamind,2010,Sci-Fi,148337537 Harry Potter and the Sorcerer's Stone,2001,Adventure,317557891 R.I.P.D.,2013,Comedy,33592415 Pirates of the Caribbean: The Curse of the Black Pearl,2003,Adventure,305388685 The Hunger Games: Mockingjay - Part 1,2014,Thriller,337103873 The Da Vinci Code,2006,Thriller,217536138 Rio 2,2014,Comedy,131536019 X-Men 2,2003,Thriller,214948780 Fast Five,2011,Crime,209805005 Sherlock Holmes: A Game of Shadows,2011,Action,186830669 Clash of the Titans,2010,Fantasy,163192114 Total Recall,1990,Sci-Fi,119412921 The 13th Warrior,1999,Adventure,32694788 The Bourne Legacy,2012,Action,113165635 Batman & Robin,1997,Action,107285004 How the Grinch Stole Christmas,2000,Fantasy,260031035 The Day After Tomorrow,2004,Sci-Fi,186739919 Mission: Impossible II,2000,Thriller,215397307 The Perfect Storm,2000,Action,182618434 Fantastic 4: Rise of the Silver Surfer,2007,Sci-Fi,131920333 Life of Pi,2012,Adventure,124976634 Ghost Rider,2007,Fantasy,115802596 Jason Bourne,2016,Thriller,108521835 Charlie's Angels: Full Throttle,2003,Action,100685880 Prometheus,2012,Sci-Fi,126464904 Stuart Little 2,2002,Comedy,64736114 Elysium,2013,Thriller,93050117 The Chronicles of Riddick,2004,Sci-Fi,57637485 RoboCop,2014,Crime,58607007 Speed Racer,2008,Action,43929341 How Do You Know,2010,Comedy,30212620 Knight and Day,2010,Comedy,76418654 Oblivion,2013,Adventure,89021735 Star Wars: Episode III - Revenge of the Sith,2005,Sci-Fi,380262555 Star Wars: Episode II - Attack of the Clones,2002,Fantasy,310675583 "Monsters, Inc.",2001,Family,289907418 The Wolverine,2013,Thriller,132550960 Star Wars: Episode I - The Phantom Menace,1999,Adventure,474544677 The Croods,2013,Comedy,187165546 Windtalkers,2002,War,40911830 The Huntsman: Winter's War,2016,Drama,47952020 Teenage Mutant Ninja Turtles,2014,Action,190871240 Gravity,2013,Drama,274084951 Dante's Peak,1997,Thriller,67155742 Fantastic Four,2015,Action,56114221 Night at the Museum,2006,Fantasy,250863268 San Andreas,2015,Action,155181732 Tomorrow Never Dies,1997,Adventure,125332007 The Patriot,2000,Drama,113330342 Ocean's Twelve,2004,Thriller,125531634 Mr. & Mrs. Smith,2005,Comedy,186336103 Insurgent,2015,Adventure,129995817 The Aviator,2004,Biography,102608827 Gulliver's Travels,2010,Fantasy,42776259 The Green Hornet,2011,Comedy,98780042 300: Rise of an Empire,2014,Fantasy,106369117 The Smurfs,2011,Fantasy,142614158 Home on the Range,2004,Family,50026353 Allegiant,2016,Adventure,66002193 Real Steel,2011,Action,85463309 The Smurfs 2,2013,Fantasy,71017784 Speed 2: Cruise Control,1997,Romance,48068396 Ender's Game,2013,Action,61656849 Live Free or Die Hard,2007,Adventure,134520804 The Lord of the Rings: The Fellowship of the Ring,2001,Action,313837577 Around the World in 80 Days,2004,Action,24004159 Ali,2001,Sport,58183966 The Cat in the Hat,2003,Family,100446895 "I, Robot",2004,Action,144795350 Kingdom of Heaven,2005,History,47396698 Stuart Little,1999,Adventure,140015224 The Princess and the Frog,2009,Family,104374107 The Martian,2015,Drama,228430993 The Island,2005,Thriller,35799026 Town & Country,2001,Comedy,6712451 Gone in Sixty Seconds,2000,Crime,101643008 Gladiator,2000,Drama,187670866 Minority Report,2002,Thriller,132014112 Harry Potter and the Chamber of Secrets,2002,Family,261970615 Casino Royale,2006,Thriller,167007184 Planet of the Apes,2001,Sci-Fi,180011740 Terminator 2: Judgment Day,1991,Action,204843350 Public Enemies,2009,Romance,97030725 American Gangster,2007,Drama,130127620 True Lies,1994,Action,146282411 The Taking of Pelham 1 2 3,2009,Action,65452312 Little Fockers,2010,Romance,148383780 The Other Guys,2010,Action,119219978 Eraser,1996,Action,101228120 Django Unchained,2012,Drama,162804648 The Hunchback of Notre Dame,1996,Romance,100117603 The Emperor's New Groove,2000,Adventure,89296573 The Expendables 2,2012,Thriller,85017401 National Treasure,2004,Comedy,173005002 Eragon,2006,Action,75030163 Where the Wild Things Are,2009,Drama,77222184 Pan,2015,Family,34964818 Epic,2013,Adventure,107515297 The Tourist,2010,Thriller,67631157 End of Days,1999,Action,66862068 Blood Diamond,2006,Adventure,57366262 The Wolf of Wall Street,2013,Comedy,116866727 Batman Forever,1995,Adventure,184031112 Starship Troopers,1997,Sci-Fi,54700065 Cloud Atlas,2012,Sci-Fi,27098580 Legend of the Guardians: The Owls of Ga'Hoole,2010,Adventure,55673333 Catwoman,2004,Fantasy,40198710 Hercules,2014,Adventure,72660029 Treasure Planet,2002,Animation,38120554 Land of the Lost,2009,Adventure,49392095 The Expendables 3,2014,Action,39292022 Point Break,2015,Action,28772222 Son of the Mask,2005,Family,17010646 In the Heart of the Sea,2015,Action,24985612 The Adventures of Pluto Nash,2002,Sci-Fi,4411102 Green Zone,2010,Thriller,35024475 The Peanuts Movie,2015,Adventure,130174897 The Spanish Prisoner,1997,Mystery,10200000 The Mummy Returns,2001,Fantasy,202007640 Gangs of New York,2002,Drama,77679638 The Flowers of War,2011,Drama,9213 Surf's Up,2007,Comedy,58867694 The Stepford Wives,2004,Comedy,59475623 Black Hawk Down,2001,War,108638745 The Campaign,2012,Comedy,86897182 The Fifth Element,1997,Adventure,63540020 Sex and the City 2,2010,Comedy,95328937 The Road to El Dorado,2000,Comedy,50802661 Ice Age: Continental Drift,2012,Adventure,161317423 Cinderella,2015,Romance,201148159 The Lovely Bones,2009,Drama,43982842 Finding Nemo,2003,Comedy,380838870 The Lord of the Rings: The Return of the King,2003,Drama,377019252 The Lord of the Rings: The Two Towers,2002,Action,340478898 Seventh Son,2014,Adventure,17176900 Lara Croft: Tomb Raider,2001,Thriller,131144183 Transcendence,2014,Thriller,23014504 Jurassic Park III,2001,Thriller,181166115 Rise of the Planet of the Apes,2011,Action,176740650 The Spiderwick Chronicles,2008,Family,71148699 A Good Day to Die Hard,2013,Thriller,67344392 The Alamo,2004,Western,22406362 The Incredibles,2004,Adventure,261437578 Cutthroat Island,1995,Adventure,11000000 Percy Jackson & the Olympians: The Lightning Thief,2010,Family,88761720 Men in Black,1997,Family,250147615 Toy Story 2,1999,Comedy,245823397 Unstoppable,2010,Thriller,81557479 Rush Hour 2,2001,Comedy,226138454 What Lies Beneath,2000,Fantasy,155370362 Cloudy with a Chance of Meatballs,2009,Family,124870275 Ice Age: Dawn of the Dinosaurs,2009,Family,196573705 The Secret Life of Walter Mitty,2013,Fantasy,58229120 Charlie's Angels,2000,Action,125305545 The Departed,2006,Crime,132373442 Mulan,1998,Fantasy,120618403 Tropic Thunder,2008,Action,110416702 The Girl with the Dragon Tattoo,2011,Drama,102515793 Die Hard with a Vengeance,1995,Adventure,100012500 Sherlock Holmes,2009,Adventure,209019489 Atlantis: The Lost Empire,2001,Action,84037039 Alvin and the Chipmunks: The Road Chip,2015,Animation,85884815 Valkyrie,2008,History,83077470 You Don't Mess with the Zohan,2008,Comedy,100018837 Pixels,2015,Animation,78747585 A.I. Artificial Intelligence,2001,Drama,78616689 The Haunted Mansion,2003,Comedy,75817994 Contact,1997,Drama,100853835 Hollow Man,2000,Action,73209340 The Interpreter,2005,Crime,72515360 Percy Jackson: Sea of Monsters,2013,Fantasy,68558662 Lara Croft Tomb Raider: The Cradle of Life,2003,Fantasy,65653758 Now You See Me 2,2016,Comedy,64685359 The Saint,1997,Action,61355436 Spy Game,2001,Thriller,26871 Mission to Mars,2000,Thriller,60874615 Rio,2011,Adventure,143618384 Bicentennial Man,1999,Comedy,58220776 Volcano,1997,Action,47474112 The Devil's Own,1997,Thriller,42877165 K-19: The Widowmaker,2002,History,35168677 Fantastic Four,2015,Sci-Fi,56114221 Conan the Barbarian,1982,Fantasy,37567440 Cinderella Man,2005,Drama,61644321 The Nutcracker in 3D,2010,Fantasy,190562 Seabiscuit,2003,History,120147445 Twister,1996,Adventure,241688385 The Fast and the Furious,2001,Thriller,144512310 Cast Away,2000,Adventure,233630478 Happy Feet,2006,Music,197992827 The Bourne Supremacy,2004,Mystery,176049130 Air Force One,1997,Drama,172620724 Ocean's Eleven,2001,Crime,183405771 The Three Musketeers,2011,Romance,20315324 Hotel Transylvania,2012,Animation,148313048 Enchanted,2007,Animation,127706877 Safe House,2012,Thriller,126149655 102 Dalmatians,2000,Adventure,66941559 Tower Heist,2011,Action,78009155 The Holiday,2006,Romance,63224849 Enemy of the State,1998,Drama,111544445 It's Complicated,2009,Drama,112703470 Ocean's Thirteen,2007,Crime,117144465 Open Season,2006,Animation,84303558 Divergent,2014,Mystery,150832203 Enemy at the Gates,2001,War,51396781 The Rundown,2003,Action,47592825 Last Action Hero,1993,Comedy,50016394 Memoirs of a Geisha,2005,Drama,57010853 The Fast and the Furious: Tokyo Drift,2006,Action,62494975 Arthur Christmas,2011,Fantasy,46440491 Meet Joe Black,1998,Drama,44606335 Collateral Damage,2002,Drama,40048332 Mirror Mirror,2012,Adventure,64933670 Scott Pilgrim vs. the World,2010,Romance,31494270 The Core,2003,Action,31111260 Nutty Professor II: The Klumps,2000,Sci-Fi,123307945 Scooby-Doo,2002,Comedy,153288182 Dredd,2012,Action,13401683 Click,2006,Comedy,137340146 Cats & Dogs: The Revenge of Kitty Galore,2010,Action,43575716 Jumper,2008,Adventure,80170146 Hellboy II: The Golden Army,2008,Sci-Fi,75754670 Zodiac,2007,Mystery,33048353 The 6th Day,2000,Sci-Fi,34543701 Bruce Almighty,2003,Comedy,242589580 The Expendables,2010,Action,102981571 Mission: Impossible,1996,Adventure,180965237 The Hunger Games,2012,Sci-Fi,407999255 The Hangover Part II,2011,Comedy,254455986 Batman Returns,1992,Action,162831698 Over the Hedge,2006,Animation,155019340 Lilo & Stitch,2002,Family,145771527 Deep Impact,1998,Thriller,140459099 RED 2,2013,Crime,53215979 The Longest Yard,2005,Sport,158115031 Alvin and the Chipmunks: Chipwrecked,2011,Animation,133103929 Grown Ups 2,2013,Comedy,133668525 Get Smart,2008,Comedy,130313314 Something's Gotta Give,2003,Comedy,124590960 Shutter Island,2010,Mystery,127968405 Four Christmases,2008,Comedy,120136047 Robots,2005,Adventure,128200012 Face/Off,1997,Thriller,112225777 Bedtime Stories,2008,Romance,109993847 Road to Perdition,2002,Crime,104054514 Just Go with It,2011,Comedy,103028109 Con Air,1997,Action,101087161 Eagle Eye,2008,Action,101111837 Cold Mountain,2003,History,95632614 The Book of Eli,2010,Thriller,94822707 Flubber,1997,Sci-Fi,92969824 The Haunting,1999,Mystery,91188905 Space Jam,1996,Fantasy,90443603 The Pink Panther,2006,Comedy,82226474 The Day the Earth Stood Still,2008,Sci-Fi,79363785 Conspiracy Theory,1997,Thriller,76081498 Fury,2014,War,85707116 Six Days Seven Nights,1998,Comedy,74329966 Yogi Bear,2010,Family,100169068 Spirit: Stallion of the Cimarron,2002,Animation,73215310 Zookeeper,2011,Family,80360866 Lost in Space,1998,Action,69102910 The Manchurian Candidate,2004,Mystery,65948711 Hotel Transylvania 2,2015,Animation,169692572 Fantasia 2000,1999,Music,60507228 The Time Machine,2002,Adventure,56684819 Mighty Joe Young,1998,Thriller,50628009 Swordfish,2001,Action,69772969 The Legend of Zorro,2005,Action,45356386 What Dreams May Come,1998,Romance,55350897 Little Nicky,2000,Fantasy,39442871 The Brothers Grimm,2005,Adventure,37899638 Mars Attacks!,1996,Sci-Fi,37754208 Surrogates,2009,Sci-Fi,38542418 Thirteen Days,2000,History,34566746 Daylight,1996,Thriller,32885565 Walking with Dinosaurs 3D,2013,Animation,36073232 Battlefield Earth,2000,Adventure,21471685 Looney Tunes: Back in Action,2003,Family,20950820 Nine,2009,Romance,19673424 Timeline,2003,Adventure,19480739 The Postman,1997,Adventure,17593391 Babe: Pig in the City,1998,Fantasy,18318000 The Last Witch Hunter,2015,Fantasy,27356090 Red Planet,2000,Action,17473245 Arthur and the Invisibles,2006,Animation,15131330 Oceans,2009,Documentary,19406406 A Sound of Thunder,2005,Horror,1891821 Pompeii,2014,History,23219748 A Beautiful Mind,2001,Drama,170708996 The Lion King,1994,Animation,422783777 Journey 2: The Mysterious Island,2012,Adventure,103812241 Cloudy with a Chance of Meatballs 2,2013,Fantasy,119793567 Red Dragon,2002,Drama,92930005 Hidalgo,2004,Western,67286731 Jack and Jill,2011,Comedy,74158157 2 Fast 2 Furious,2003,Crime,127083765 The Little Prince,2015,Family,1339152 The Invasion,2007,Thriller,15071514 The Adventures of Rocky & Bullwinkle,2000,Family,26000610 The Secret Life of Pets,2016,Family,323505540 The League of Extraordinary Gentlemen,2003,Adventure,66462600 Despicable Me 2,2013,Sci-Fi,368049635 Independence Day,1996,Adventure,306124059 The Lost World: Jurassic Park,1997,Sci-Fi,229074524 Madagascar,2005,Comedy,193136719 Children of Men,2006,Thriller,35286428 X-Men,2000,Adventure,157299717 Wanted,2008,Action,134568845 The Rock,1996,Action,134006721 Ice Age: The Meltdown,2006,Action,195329763 50 First Dates,2004,Comedy,120776832 Hairspray,2007,Drama,118823091 Exorcist: The Beginning,2004,Mystery,41814863 Inspector Gadget,1999,Action,97360069 Now You See Me,2013,Thriller,117698894 Grown Ups,2010,Comedy,162001186 The Terminal,2004,Comedy,77032279 Hotel for Dogs,2009,Family,73023275 Vertical Limit,2000,Action,68473360 Charlie Wilson's War,2007,Comedy,66636385 Shark Tale,2004,Comedy,160762022 Dreamgirls,2006,Musical,103338338 Be Cool,2005,Crime,55808744 Munich,2005,Thriller,47379090 Tears of the Sun,2003,Action,43426961 Killers,2010,Comedy,47000485 The Man from U.N.C.L.E.,2015,Adventure,45434443 Spanglish,2004,Drama,42044321 Monster House,2006,Mystery,73661010 Bandits,2001,Comedy,41523271 First Knight,1995,Action,37600435 Anna and the King,1999,Drama,39251128 Immortals,2011,Drama,83503161 Hostage,2005,Action,34636443 Titan A.E.,2000,Adventure,22751979 Hollywood Homicide,2003,Thriller,30013346 Soldier,1998,Drama,14567883 Monkeybone,2001,Animation,5409517 Flight of the Phoenix,2004,Thriller,21009180 Unbreakable,2000,Drama,94999143 Minions,2015,Comedy,336029560 Sucker Punch,2011,Action,36381716 Snake Eyes,1998,Thriller,55585389 Sphere,1998,Drama,36976367 The Angry Birds Movie,2016,Comedy,107225164 Fool's Gold,2008,Adventure,70224196 Funny People,2009,Comedy,51814190 The Kingdom,2007,Thriller,47456450 Talladega Nights: The Ballad of Ricky Bobby,2006,Action,148213377 Dr. Dolittle 2,2001,Comedy,112950721 Braveheart,1995,History,75600000 Jarhead,2005,Action,62647540 The Simpsons Movie,2007,Comedy,183132370 The Majestic,2001,Drama,27796042 Driven,2001,Drama,32616869 Two Brothers,2004,Family,18947630 The Village,2004,Drama,114195633 Doctor Dolittle,1998,Comedy,144156464 Signs,2002,Sci-Fi,227965690 Shrek 2,2004,Comedy,436471036 Cars,2006,Comedy,244052771 Runaway Bride,1999,Romance,152149590 xXx,2002,Action,141204016 The SpongeBob Movie: Sponge Out of Water,2015,Family,162495848 Ransom,1996,Crime,136448821 Inglourious Basterds,2009,War,120523073 Hook,1991,Comedy,119654900 Hercules,2014,Adventure,72660029 Die Hard 2,1990,Action,117541000 S.W.A.T.,2003,Thriller,116643346 Vanilla Sky,2001,Thriller,100614858 Lady in the Water,2006,Mystery,42272747 AVP: Alien vs. Predator,2004,Thriller,80281096 Alvin and the Chipmunks: The Squeakquel,2009,Music,219613391 We Were Soldiers,2002,Action,78120196 Olympus Has Fallen,2013,Action,98895417 Star Trek: Insurrection,1998,Adventure,70117571 Battle Los Angeles,2011,Sci-Fi,83552429 Big Fish,2003,Drama,66257002 Wolf,1994,Horror,65012000 War Horse,2011,Drama,79883359 The Monuments Men,2014,War,78031620 The Abyss,1989,Thriller,54222000 Wall Street: Money Never Sleeps,2010,Drama,52474616 Dracula Untold,2014,Fantasy,55942830 The Siege,1998,Thriller,40932372 Stardust,2007,Romance,38345403 Seven Years in Tibet,1997,Drama,37901509 The Dilemma,2011,Drama,48430355 Bad Company,2002,Adventure,30157016 Doom,2005,Sci-Fi,28031250 I Spy,2002,Thriller,33105600 Underworld: Awakening,2012,Action,62321039 Rock of Ages,2012,Musical,38509342 Hart's War,2002,Drama,19076815 Killer Elite,2011,Thriller,25093607 Rollerball,2002,Sci-Fi,18990542 Ballistic: Ecks vs. Sever,2002,Crime,14294842 Hard Rain,1998,Drama,19819494 Osmosis Jones,2001,Adventure,13596911 Blackhat,2015,Action,7097125 Sky Captain and the World of Tomorrow,2004,Thriller,37760080 Basic Instinct 2,2006,Mystery,5851188 Escape Plan,2013,Crime,25121291 The Legend of Hercules,2014,Fantasy,18821279 The Sum of All Fears,2002,Drama,118471320 The Twilight Saga: Eclipse,2010,Fantasy,300523113 The Score,2001,Thriller,71069884 Despicable Me,2010,Family,251501645 Money Train,1995,Comedy,35324232 Ted 2,2015,Comedy,81257500 Agora,2009,History,617840 Mystery Men,1999,Fantasy,29655590 Hall Pass,2011,Comedy,45045037 The Insider,1999,Thriller,28965197 Body of Lies,2008,Drama,39380442 Abraham Lincoln: Vampire Hunter,2012,Horror,37516013 Entrapment,1999,Crime,87704396 The X Files,1998,Sci-Fi,83892374 The Last Legion,2007,Action,5932060 Saving Private Ryan,1998,Action,216119491 Need for Speed,2014,Crime,43568507 What Women Want,2000,Comedy,182805123 Ice Age,2002,Adventure,176387405 Dreamcatcher,2003,Drama,33685268 Lincoln,2012,War,182204440 The Matrix,1999,Action,171383253 Apollo 13,1995,Adventure,172071312 Total Recall,1990,Action,119412921 The Santa Clause 2,2002,Fantasy,139225854 Les Misérables,2012,Musical,148775460 You've Got Mail,1998,Romance,115731542 Step Brothers,2008,Comedy,100468793 The Mask of Zorro,1998,Adventure,93771072 Due Date,2010,Drama,100448498 Unbroken,2014,Sport,115603980 Space Cowboys,2000,Action,90454043 Cliffhanger,1993,Action,84049211 Broken Arrow,1996,Thriller,70450000 The Kid,2000,Family,69688384 World Trade Center,2006,History,70236496 Mona Lisa Smile,2003,Drama,63695760 The Dictator,2012,Romance,59617068 Eyes Wide Shut,1999,Mystery,55637680 Annie,2014,Comedy,85911262 Focus,2015,Crime,53846915 This Means War,2012,Comedy,54758461 Blade: Trinity,2004,Sci-Fi,52397389 Primary Colors,1998,Drama,38966057 Resident Evil: Retribution,2012,Action,42345531 Death Race,2008,Sci-Fi,36064910 The Long Kiss Goodnight,1996,Action,33328051 Proof of Life,2000,Drama,32598931 Zathura: A Space Adventure,2005,Adventure,28045540 Fight Club,1999,Drama,37023395 We Are Marshall,2006,Drama,43532294 Hudson Hawk,1991,Action,17218080 Lucky Numbers,2000,Crime,10014234 "I, Frankenstein",2014,Sci-Fi,19059018 Oliver Twist,2005,Drama,1987287 Elektra,2005,Action,24407944 Sin City: A Dame to Kill For,2014,Crime,13750556 Random Hearts,1999,Drama,31054924 Everest,2015,Biography,43247140 Perfume: The Story of a Murderer,2006,Fantasy,2208939 Austin Powers in Goldmember,2002,Comedy,213079163 Astro Boy,2009,Family,19548064 Jurassic Park,1993,Thriller,356784000 Wyatt Earp,1994,Biography,25052000 Clear and Present Danger,1994,Action,122012710 Dragon Blade,2015,Action,72413 Littleman,2006,Crime,58255287 U-571,2000,Action,77086030 The American President,1995,Comedy,65000000 The Love Guru,2008,Sport,32178777 3000 Miles to Graceland,2001,Comedy,15738632 The Hateful Eight,2015,Mystery,54116191 Blades of Glory,2007,Comedy,118153533 Hop,2011,Adventure,108012170 300,2006,Fantasy,210592590 Meet the Fockers,2004,Comedy,279167575 Marley & Me,2008,Comedy,143151473 The Green Mile,1999,Mystery,136801374 Chicken Little,2005,Animation,135381507 Gone Girl,2014,Mystery,167735396 The Bourne Identity,2002,Thriller,121468960 GoldenEye,1995,Adventure,106635996 The General's Daughter,1999,Thriller,102678089 The Truman Show,1998,Sci-Fi,125603360 The Prince of Egypt,1998,Fantasy,101217900 Daddy Day Care,2003,Comedy,104148781 2 Guns,2013,Comedy,75573300 Cats & Dogs,2001,Fantasy,93375151 The Italian Job,2003,Action,106126012 Two Weeks Notice,2002,Comedy,93307796 Antz,1998,Comedy,90646554 Couples Retreat,2009,Comedy,109176215 Days of Thunder,1990,Action,82670733 Cheaper by the Dozen 2,2005,Family,82569532 The Scorch Trials,2015,Sci-Fi,81687587 Eat Pray Love,2010,Drama,80574010 The Family Man,2000,Comedy,75764085 RED,2010,Action,90356857 Any Given Sunday,1999,Drama,75530832 The Horse Whisperer,1998,Romance,75370763 Collateral,2004,Thriller,100003492 The Scorpion King,2002,Action,90341670 Ladder 49,2004,Thriller,74540762 Jack Reacher,2012,Action,80033643 Deep Blue Sea,1999,Sci-Fi,73648142 This Is It,2009,Documentary,71844424 Contagion,2011,Thriller,75638743 Kangaroo Jack,2003,Comedy,66734992 Coraline,2009,Family,75280058 The Happening,2008,Thriller,64505912 Man on Fire,2004,Thriller,77862546 The Shaggy Dog,2006,Family,61112916 Starsky & Hutch,2004,Comedy,88200225 Jingle All the Way,1996,Family,60573641 Hellboy,2004,Sci-Fi,59035104 A Civil Action,1998,Drama,56702901 ParaNorman,2012,Family,55994557 The Jackal,1997,Crime,54910560 Paycheck,2003,Action,53789313 Up Close & Personal,1996,Romance,51045801 The Tale of Despereaux,2008,Animation,50818750 The Tuxedo,2002,Comedy,50189179 Under Siege 2: Dark Territory,1995,Action,50024083 Jack Ryan: Shadow Recruit,2014,Drama,50549107 Joy,2015,Comedy,56443482 London Has Fallen,2016,Drama,62401264 Alien: Resurrection,1997,Horror,47748610 Shooter,2007,Action,46975183 The Boxtrolls,2014,Family,50807639 Practical Magic,1998,Fantasy,46611204 The Lego Movie,2014,Adventure,257756197 Miss Congeniality 2: Armed and Fabulous,2005,Crime,48472213 Reign of Fire,2002,Action,43060566 Gangster Squad,2013,Drama,45996718 Year One,2009,Adventure,43337279 Invictus,2009,Drama,37479778 Duplicity,2009,Romance,40559930 My Favorite Martian,1999,Comedy,36830057 The Sentinel,2006,Thriller,36279230 Planet 51,2009,Adventure,42194060 Star Trek: Nemesis,2002,Sci-Fi,43119879 Intolerable Cruelty,2003,Romance,35096190 Edge of Darkness,2010,Mystery,43290977 The Relic,1997,Sci-Fi,33927476 Analyze That,2002,Comedy,32122249 Righteous Kill,2008,Action,40076438 Mercury Rising,1998,Action,32940507 The Soloist,2009,Biography,31670931 The Legend of Bagger Vance,2000,Fantasy,30695227 Almost Famous,2000,Music,32522352 xXx: State of the Union,2005,Crime,26082914 Priest,2011,Thriller,29136626 Sinbad: Legend of the Seven Seas,2003,Adventure,26288320 Event Horizon,1997,Horror,26616590 The Avengers,2012,Sci-Fi,623279547 Dragonfly,2002,Fantasy,30063805 The Black Dahlia,2006,Crime,22518325 Flyboys,2006,Adventure,13082288 The Last Castle,2001,Thriller,18208078 Supernova,2000,Thriller,14218868 Winter's Tale,2014,Drama,22451 The Mortal Instruments: City of Bones,2013,Mystery,31165421 Meet Dave,2008,Romance,11802056 Dark Water,2005,Horror,25472967 Edtv,1999,Drama,22362500 Inkheart,2008,Fantasy,17281832 The Spirit,2008,Crime,19781879 Mortdecai,2015,Mystery,7605668 In the Name of the King: A Dungeon Siege Tale,2007,Action,4535117 Beyond Borders,2003,Romance,4426297 The Great Raid,2005,Drama,10166502 Deadpool,2016,Adventure,363024263 Holy Man,1998,Drama,12065985 American Sniper,2014,Biography,350123553 Goosebumps,2015,Adventure,80021740 Just Like Heaven,2005,Romance,48291624 The Flintstones in Viva Rock Vegas,2000,Sci-Fi,35231365 Rambo III,1988,Action,53715611 Leatherheads,2008,Sport,31199215 Did You Hear About the Morgans?,2009,Comedy,29580087 The Internship,2013,Comedy,44665963 Resident Evil: Afterlife,2010,Action,60128566 Red Tails,2012,History,49875589 The Devil's Advocate,1997,Mystery,60984028 That's My Boy,2012,Comedy,36931089 DragonHeart,1996,Action,51317350 After the Sunset,2004,Drama,28328132 Ghost Rider: Spirit of Vengeance,2011,Thriller,51774002 Captain Corelli's Mandolin,2001,War,25528495 The Pacifier,2005,Family,113006880 Walking Tall,2004,Crime,45860039 Forrest Gump,1994,Comedy,329691196 Alvin and the Chipmunks,2007,Family,217326336 Meet the Parents,2000,Comedy,166225040 Pocahontas,1995,Romance,141600000 Superman,1978,Action,134218018 The Nutty Professor,1996,Comedy,128769345 Hitch,2005,Comedy,177575142 George of the Jungle,1997,Action,105263257 American Wedding,2003,Romance,104354205 Captain Phillips,2013,Thriller,107100855 Date Night,2010,Romance,98711404 Casper,1995,Comedy,100328194 The Equalizer,2014,Action,101530738 Maid in Manhattan,2002,Drama,93815117 Crimson Tide,1995,Drama,91400000 The Pursuit of Happyness,2006,Drama,162586036 Flightplan,2005,Drama,89706988 Disclosure,1994,Thriller,83000000 City of Angels,1998,Romance,78745923 Kill Bill: Vol. 1,2003,Action,70098138 Bowfinger,1999,Comedy,66365290 Kill Bill: Vol. 2,2004,Crime,66207920 Tango & Cash,1989,Thriller,63408614 Death Becomes Her,1992,Fantasy,58422650 Shanghai Noon,2000,Adventure,56932305 Executive Decision,1996,Adventure,68750000 Mr. Popper's Penguins,2011,Family,68218041 The Forbidden Kingdom,2008,Fantasy,25040293 Free Birds,2013,Animation,55747724 Alien 3,1992,Sci-Fi,55473600 Evita,1996,Biography,49994804 Ronin,1998,Thriller,41609593 The Ghost and the Darkness,1996,Adventure,38553833 Paddington,2014,Fantasy,76137505 The Watch,2012,Sci-Fi,34350553 The Hunted,2003,Drama,34238611 Instinct,1999,Thriller,34098563 Stuck on You,2003,Comedy,33828318 Semi-Pro,2008,Sport,33472850 The Pirates! Band of Misfits,2012,Animation,31051126 Changeling,2008,Mystery,35707327 Chain Reaction,1996,Action,20550712 The Fan,1996,Drama,18573791 The Phantom of the Opera,2004,Musical,51225796 Elizabeth: The Golden Age,2007,Drama,16264475 Æon Flux,2005,Sci-Fi,25857987 Gods and Generals,2003,History,12870569 Turbulence,1997,Thriller,11466088 Imagine That,2009,Family,16088610 Muppets Most Wanted,2014,Family,51178893 Thunderbirds,2004,Sci-Fi,6768055 Burlesque,2010,Music,39440655 A Very Long Engagement,2004,Romance,6167817 Blade II,2002,Action,81645152 Seven Pounds,2008,Drama,69951824 Bullet to the Head,2012,Action,9483821 The Godfather: Part III,1990,Drama,66676062 Elizabethtown,2005,Comedy,26838389 "You, Me and Dupree",2006,Comedy,75604320 Superman II,1980,Romance,108200000 Gigli,2003,Comedy,5660084 All the King's Men,2006,Drama,7221458 Shaft,2000,Thriller,70327868 Anastasia,1997,Fantasy,58297830 Moulin Rouge!,2001,Musical,57386369 Domestic Disturbance,2001,Thriller,45207112 Black Mass,2015,Crime,62563543 Flags of Our Fathers,2006,Drama,33574332 Law Abiding Citizen,2009,Crime,73343413 Grindhouse,2007,Horror,25031037 Beloved,1998,Drama,22843047 Lucky You,2007,Drama,5755286 Catch Me If You Can,2002,Biography,164435221 Zero Dark Thirty,2012,Drama,95720716 The Break-Up,2006,Drama,118683135 Mamma Mia!,2008,Musical,143704210 Valentine's Day,2010,Comedy,110476776 The Dukes of Hazzard,2005,Action,80270227 The Thin Red Line,1998,Drama,36385763 The Change-Up,2011,Fantasy,37035845 Man on the Moon,1999,Drama,34580635 Casino,1995,Biography,42438300 From Paris with Love,2010,Thriller,23324666 Bulletproof Monk,2003,Action,23020488 "Me, Myself & Irene",2000,Comedy,90567722 Barnyard,2006,Animation,72601713 The Twilight Saga: New Moon,2009,Fantasy,296623634 Shrek,2001,Adventure,267652016 The Adjustment Bureau,2011,Romance,62453315 Robin Hood: Prince of Thieves,1991,Romance,165500000 Jerry Maguire,1996,Sport,153620822 Ted,2012,Fantasy,218628680 As Good as It Gets,1997,Comedy,147637474 Patch Adams,1998,Drama,135014968 Anchorman 2: The Legend Continues,2013,Comedy,2175312 Mr. Deeds,2002,Comedy,126203320 Super 8,2011,Sci-Fi,126975169 Erin Brockovich,2000,Drama,125548685 How to Lose a Guy in 10 Days,2003,Romance,105807520 22 Jump Street,2014,Crime,191616238 Interview with the Vampire: The Vampire Chronicles,1994,Horror,105264608 Yes Man,2008,Comedy,97680195 Central Intelligence,2016,Comedy,126088877 Stepmom,1998,Comedy,91030827 Daddy's Home,2015,Family,150315155 Into the Woods,2014,Adventure,127997349 Inside Man,2006,Mystery,88504640 Payback,1999,Drama,81517441 Congo,1995,Mystery,81022333 Knowing,2009,Thriller,79948113 Failure to Launch,2006,Comedy,88658172 "Crazy, Stupid, Love.",2011,Romance,84244877 Garfield,2004,Comedy,75367693 Christmas with the Kranks,2004,Family,73701902 Moneyball,2011,Biography,75605492 Outbreak,1995,Thriller,67823573 Non-Stop,2014,Mystery,91439400 Race to Witch Mountain,2009,Thriller,67128202 V for Vendetta,2005,Action,70496802 Shanghai Knights,2003,Action,60470220 Curious George,2006,Adventure,58336565 Herbie Fully Loaded,2005,Sport,66002004 Don't Say a Word,2001,Crime,54997476 Hansel & Gretel: Witch Hunters,2013,Horror,55682070 Unfaithful,2002,Thriller,52752475 I Am Number Four,2011,Action,55092830 Syriana,2005,Drama,50815288 13 Hours,2016,Drama,52822418 The Book of Life,2014,Family,50150619 Firewall,2006,Crime,48745150 Absolute Power,1997,Thriller,50007168 G.I. Jane,1997,Action,48154732 The Game,1997,Thriller,48265581 Silent Hill,2006,Mystery,46982632 The Replacements,2000,Comedy,44737059 American Reunion,2012,Comedy,56724080 The Negotiator,1998,Mystery,44484065 Into the Storm,2014,Action,47553512 Beverly Hills Cop III,1994,Thriller,42610000 Gremlins 2: The New Batch,1990,Horror,41482207 The Judge,2014,Crime,47105085 The Peacemaker,1997,Thriller,41256277 Resident Evil: Apocalypse,2004,Sci-Fi,50740078 Bridget Jones: The Edge of Reason,2004,Comedy,40203020 Out of Time,2003,Thriller,40905277 On Deadly Ground,1994,Thriller,38590500 The Adventures of Sharkboy and Lavagirl 3-D,2005,Adventure,39177541 The Beach,2000,Drama,39778599 Raising Helen,2004,Drama,37486138 Ninja Assassin,2009,Action,38105077 For Love of the Game,1999,Sport,35168395 Striptease,1996,Thriller,32800000 Marmaduke,2010,Comedy,33643461 Hereafter,2010,Drama,32741596 Murder by Numbers,2002,Crime,31874869 Assassins,1995,Crime,30306268 Hannibal Rising,2007,Drama,27667947 The Story of Us,1999,Romance,27067160 The Host,2013,Action,26616999 Basic,2003,Thriller,26536120 Blood Work,2002,Drama,26199517 The International,2009,Drama,25450527 Escape from L.A.,1996,Adventure,25407250 The Iron Giant,1999,Comedy,23159305 The Life Aquatic with Steve Zissou,2004,Drama,24006726 Free State of Jones,2016,Biography,20389967 The Life of David Gale,2003,Thriller,19593740 Man of the House,2005,Comedy,19118247 Run All Night,2015,Action,26442251 Eastern Promises,2007,Mystery,17114882 Into the Blue,2005,Thriller,18472363 The Messenger: The Story of Joan of Arc,1999,History,14131298 Your Highness,2011,Fantasy,21557240 Dream House,2011,Drama,21283440 Mad City,1997,Drama,10556196 Baby's Day Out,1994,Crime,16671505 The Scarlet Letter,1995,Romance,10400000 Fair Game,2010,Biography,9528092 Domino,2005,Action,10137232 Jade,1995,Drama,9795017 Gamer,2009,Thriller,20488579 Beautiful Creatures,2013,Romance,19445217 Death to Smoochy,2002,Comedy,8355815 Zoolander 2,2016,Comedy,28837115 The Big Bounce,2004,Comedy,6471394 What Planet Are You From?,2000,Sci-Fi,6291602 Drive Angry,2011,Thriller,10706786 Street Fighter: The Legend of Chun-Li,2009,Crime,8742261 The One,2001,Action,43905746 The Adventures of Ford Fairlane,1990,Action,21413502 Traffic,2000,Thriller,124107476 Indiana Jones and the Last Crusade,1989,Action,197171806 Chappie,2015,Action,31569268 The Bone Collector,1999,Mystery,66488090 Panic Room,2002,Drama,95308367 Three Kings,1999,Adventure,60652036 Child 44,2015,Thriller,1206135 Rat Race,2001,Adventure,56607223 K-PAX,2001,Drama,50173190 Kate & Leopold,2001,Comedy,47095453 Bedazzled,2000,Romance,37879996 The Cotton Club,1984,Drama,25900000 3:10 to Yuma,2007,Adventure,53574088 Taken 3,2014,Action,89253340 Out of Sight,1998,Thriller,37339525 The Cable Guy,1996,Comedy,60154431 Dick Tracy,1990,Crime,103738726 The Thomas Crown Affair,1999,Crime,69304264 Riding in Cars with Boys,2001,Comedy,29781453 Happily N'Ever After,2006,Adventure,15519841 Mary Reilly,1996,Drama,5600000 My Best Friend's Wedding,1997,Comedy,126805112 America's Sweethearts,2001,Romance,93607673 Insomnia,2002,Thriller,67263182 Star Trek: First Contact,1996,Sci-Fi,92001027 Jonah Hex,2010,Fantasy,10539414 Courage Under Fire,1996,Action,58918501 Liar Liar,1997,Comedy,181395380 The Flintstones,1994,Comedy,130512915 Taken 2,2012,Thriller,139852971 Scary Movie 3,2003,Comedy,110000082 Miss Congeniality,2000,Romance,106807667 Journey to the Center of the Earth,2008,Adventure,101702060 The Princess Diaries 2: Royal Engagement,2004,Family,95149435 The Pelican Brief,1993,Mystery,100768056 The Client,1994,Drama,92115211 The Bucket List,2007,Drama,93452056 Patriot Games,1992,Thriller,83287363 Monster-in-Law,2005,Romance,82931301 Prisoners,2013,Mystery,60962878 Training Day,2001,Thriller,76261036 Galaxy Quest,1999,Sci-Fi,71423726 Scary Movie 2,2001,Comedy,71277420 The Muppets,2011,Musical,88625922 Blade,1998,Horror,70001065 Coach Carter,2005,Drama,67253092 Changing Lanes,2002,Drama,66790248 Anaconda,1997,Adventure,65557989 Coyote Ugly,2000,Drama,60786269 Love Actually,2003,Drama,59365105 A Bug's Life,1998,Fantasy,162792677 From Hell,2001,Thriller,31598308 The Specialist,1994,Crime,57362581 Tin Cup,1996,Comedy,53854588 Kicking & Screaming,2005,Romance,52580895 The Hitchhiker's Guide to the Galaxy,2005,Adventure,51019112 Fat Albert,2004,Romance,48114556 Resident Evil: Extinction,2007,Horror,50648679 Blended,2014,Comedy,46280507 Last Holiday,2006,Adventure,38360195 The River Wild,1994,Crime,46815748 The Indian in the Cupboard,1995,Drama,35617599 Savages,2012,Drama,47307550 Cellular,2004,Crime,32003620 Johnny English,2003,Adventure,27972410 The Ant Bully,2006,Family,28133159 Dune,1984,Adventure,27400000 Across the Universe,2007,Drama,24343673 Revolutionary Road,2008,Drama,22877808 16 Blocks,2006,Drama,36883539 Babylon A.D.,2008,Sci-Fi,22531698 The Glimmer Man,1996,Comedy,20400913 Multiplicity,1996,Sci-Fi,20101861 Aliens in the Attic,2009,Sci-Fi,25200412 The Pledge,2001,Mystery,19719930 The Producers,2005,Musical,19377727 Dredd,2012,Action,13401683 The Phantom,1996,Comedy,17300889 All the Pretty Horses,2000,Western,15527125 Nixon,1995,Drama,13560960 The Ghost Writer,2010,Mystery,15523168 Deep Rising,1998,Horror,11146409 Miracle at St. Anna,2008,War,7916887 Curse of the Golden Flower,2006,Drama,6565495 Bangkok Dangerous,2008,Crime,15279680 Big Trouble,2002,Crime,7262288 Love in the Time of Cholera,2007,Romance,4584886 Shadow Conspiracy,1997,Thriller,2154540 Johnny English Reborn,2011,Crime,8129455 Argo,2012,Biography,136019448 The Fugitive,1993,Thriller,183875760 The Bounty Hunter,2010,Action,67061228 Sleepers,1996,Crime,53300852 Rambo: First Blood Part II,1985,Action,150415432 The Juror,1996,Thriller,44834712 Pinocchio,1940,Fantasy,84300000 Heaven's Gate,1980,Western,1500000 Underworld: Evolution,2006,Fantasy,62318875 Victor Frankenstein,2015,Thriller,5773519 Finding Forrester,2000,Drama,51768623 28 Days,2000,Comedy,37035515 Unleashed,2005,Drama,24520892 The Sweetest Thing,2002,Romance,24430272 The Firm,1993,Thriller,158348400 Charlie St. Cloud,2010,Fantasy,31136950 The Mechanic,2011,Crime,29113588 21 Jump Street,2012,Action,138447667 Notting Hill,1999,Drama,116006080 Chicken Run,2000,Animation,106793915 Along Came Polly,2004,Comedy,87856565 Boomerang,1992,Drama,70100000 The Heat,2013,Crime,159578352 Cleopatra,1963,Drama,57750000 Here Comes the Boom,2012,Sport,45290318 High Crimes,2002,Mystery,41543207 The Mirror Has Two Faces,1996,Drama,41252428 The Mothman Prophecies,2002,Horror,35228696 Brüno,2009,Comedy,59992760 Licence to Kill,1989,Thriller,34667015 Red Riding Hood,2011,Horror,37652565 15 Minutes,2001,Crime,24375436 Super Mario Bros.,1993,Fantasy,20915465 Lord of War,2005,Thriller,24127895 Hero,2002,Adventure,84961 One for the Money,2012,Comedy,26404753 The Interview,2014,Comedy,6105175 The Warrior's Way,2010,Action,5664251 Micmacs,2009,Action,1260917 8 Mile,2002,Music,116724075 A Knight's Tale,2001,Action,56083966 The Medallion,2003,Action,22108977 The Sixth Sense,1999,Mystery,293501675 Man on a Ledge,2012,Thriller,18600911 The Big Year,2011,Comedy,7204138 The Karate Kid,1984,Action,90800000 American Hustle,2013,Crime,150117807 The Proposal,2009,Drama,163947053 Double Jeopardy,1999,Crime,116735231 Back to the Future Part II,1989,Sci-Fi,118500000 Lucy,2014,Thriller,126546825 Fifty Shades of Grey,2015,Drama,166147885 Spy Kids 3-D: Game Over,2003,Family,111760631 A Time to Kill,1996,Drama,108706165 Cheaper by the Dozen,2003,Comedy,138614544 Lone Survivor,2013,Action,125069696 A League of Their Own,1992,Drama,107458785 The Conjuring 2,2016,Mystery,102310175 The Social Network,2010,Drama,96917897 He's Just Not That Into You,2009,Drama,93952276 Scary Movie 4,2006,Comedy,90703745 Scream 3,2000,Horror,89138076 Back to the Future Part III,1990,Western,87666629 Get Hard,2015,Comedy,90353764 Bram Stoker's Dracula,1992,Horror,82522790 Julie & Julia,2009,Biography,94125426 42,2013,Drama,95001343 The Talented Mr. Ripley,1999,Thriller,81292135 Dumb and Dumber To,2014,Comedy,86208010 Eight Below,2006,Adventure,81593527 The Intern,2015,Drama,75274748 Ride Along 2,2016,Comedy,90835030 The Last of the Mohicans,1992,Drama,72455275 Ray,2004,Drama,75305995 Sin City,2005,Crime,74098862 Vantage Point,2008,Thriller,72266306 "I Love You, Man",2009,Romance,71347010 Shallow Hal,2001,Romance,70836296 JFK,1991,History,70405498 Big Momma's House 2,2006,Comedy,70163652 The Mexican,2001,Adventure,66808615 Unbroken,2014,War,115603980 17 Again,2009,Fantasy,64149837 The Other Woman,2014,Comedy,83906114 The Final Destination,2009,Horror,66466372 Bridge of Spies,2015,Thriller,72306065 Behind Enemy Lines,2001,Drama,59068786 Shall We Dance,2004,Romance,57887882 Small Soldiers,1998,Comedy,53955614 Spawn,1997,Action,54967359 The Count of Monte Cristo,2002,Adventure,54228104 The Lincoln Lawyer,2011,Drama,57981889 Unknown,2011,Action,61094903 The Prestige,2006,Mystery,53082743 Horrible Bosses 2,2014,Comedy,54414716 Escape from Planet Earth,2013,Adventure,57011847 Apocalypto,2006,Thriller,50859889 The Living Daylights,1987,Action,51185897 Predators,2010,Action,52000688 Legal Eagles,1986,Romance,49851591 Secret Window,2004,Mystery,47781388 The Lake House,2006,Drama,52320979 The Skeleton Key,2005,Thriller,47806295 The Odd Life of Timothy Green,2012,Comedy,51853450 Made of Honor,2008,Romance,46012734 Jersey Boys,2014,Music,47034272 The Rainmaker,1997,Drama,45856732 Gothika,2003,Thriller,59588068 Amistad,1997,History,44175394 Medicine Man,1992,Romance,45500797 Aliens vs. Predator: Requiem,2007,Horror,41797066 Ri¢hie Ri¢h,1994,Family,38087756 Autumn in New York,2000,Romance,37752931 Paul,2011,Comedy,37371385 The Guilt Trip,2012,Comedy,37101011 Scream 4,2011,Mystery,38176892 8MM,1999,Mystery,36283504 The Doors,1991,Music,35183792 Sex Tape,2014,Comedy,38543473 Hanging Up,2000,Drama,36037909 Final Destination 5,2011,Horror,42575718 Mickey Blue Eyes,1999,Romance,33864342 Pay It Forward,2000,Drama,33508922 Fever Pitch,2005,Sport,42071069 Drillbit Taylor,2008,Comedy,32853640 A Million Ways to Die in the West,2014,Western,42615685 The Shadow,1994,Adventure,32055248 Extremely Loud & Incredibly Close,2011,Mystery,31836745 Morning Glory,2010,Drama,30993544 Get Rich or Die Tryin',2005,Biography,30981850 The Art of War,2000,Adventure,30199105 Rent,2005,Drama,29077547 Bless the Child,2000,Drama,29374178 The Out-of-Towners,1999,Comedy,28535768 The Island of Dr. Moreau,1996,Sci-Fi,27663982 The Musketeer,2001,Action,27053815 The Other Boleyn Girl,2008,Drama,26814957 Sweet November,2001,Drama,25178165 The Reaping,2007,Thriller,25117498 Mean Streets,1973,Drama,32645 Renaissance Man,1994,Comedy,24332324 Colombiana,2011,Crime,36665854 The Magic Sword: Quest for Camelot,1998,Family,22717758 City by the Sea,2002,Thriller,22433915 At First Sight,1999,Drama,22326247 Torque,2004,Comedy,21176322 City Hall,1996,Drama,20300000 Marie Antoinette,2006,Drama,15962471 Kiss of Death,1995,Thriller,14942422 Get Carter,2000,Drama,14967182 The Impossible,2012,Thriller,18996755 Ishtar,1987,Action,14375181 Fantastic Mr. Fox,2009,Crime,20999103 Life or Something Like It,2002,Romance,14448589 Memoirs of an Invisible Man,1992,Comedy,14358033 Amélie,2001,Comedy,33201661 New York Minute,2004,Comedy,14018364 Alfie,2004,Romance,13395939 Big Miracle,2012,Romance,20113965 The Deep End of the Ocean,1999,Drama,13376506 Feardotcom,2002,Thriller,13208023 Cirque du Freak: The Vampire's Assistant,2009,Fantasy,13838130 Victor Frankenstein,2015,Horror,5773519 Duplex,2003,Comedy,9652000 Raise the Titanic,1980,Adventure,7000000 Universal Soldier: The Return,1999,Action,10431220 Pandorum,2009,Action,10326062 Impostor,2001,Mystery,6114237 Extreme Ops,2002,Thriller,4835968 Just Visiting,2001,Fantasy,4777007 Sunshine,2007,Thriller,3675072 A Thousand Words,2012,Drama,18438149 Delgo,2008,Adventure,511920 The Gunman,2015,Action,10640645 Alex Rider: Operation Stormbreaker,2006,Adventure,652526 Disturbia,2007,Drama,80050171 Hackers,1995,Thriller,7564000 The Hunting Party,2007,Thriller,876671 The Hudsucker Proxy,1994,Fantasy,2869369 The Warlords,2007,History,128978 Nomad: The Warrior,2005,War,77231 Snowpiercer,2013,Thriller,4563029 The Crow,1994,Fantasy,50693162 The Time Traveler's Wife,2009,Fantasy,63411478 The Fast and the Furious,2001,Crime,144512310 Frankenweenie,2012,Horror,35287788 Serenity,2005,Thriller,25335935 Against the Ropes,2004,Romance,5881504 Superman III,1983,Sci-Fi,60000000 Grudge Match,2013,Comedy,29802761 Red Cliff,2008,History,626809 Sweet Home Alabama,2002,Romance,127214072 The Ugly Truth,2009,Romance,88915214 Sgt. Bilko,1996,Comedy,30400000 Spy Kids 2: Island of Lost Dreams,2002,Action,85570368 Star Trek: Generations,1994,Thriller,75668868 The Grandmaster,2013,Drama,6594136 Water for Elephants,2011,Romance,58700247 The Hurricane,1999,Drama,50668906 Enough,2002,Crime,39177215 Heartbreakers,2001,Crime,40334024 Paul Blart: Mall Cop 2,2015,Action,71038190 Angel Eyes,2001,Drama,24044532 Joe Somebody,2001,Comedy,22770864 The Ninth Gate,1999,Thriller,18653746 Extreme Measures,1996,Thriller,17305211 Rock Star,2001,Drama,16991902 Precious,2009,Drama,47536959 White Squall,1996,Adventure,10300000 The Thing,1982,Mystery,13782838 Riddick,2013,Action,41997790 Switchback,1997,Mystery,6482195 Texas Rangers,2001,Action,623374 City of Ember,2008,Family,7871693 The Master,2012,Drama,16377274 The Express,2008,Drama,9589875 The 5th Wave,2016,Thriller,34912982 Creed,2015,Sport,109712885 The Town,2010,Thriller,92173235 What to Expect When You're Expecting,2012,Comedy,41102171 Burn After Reading,2008,Drama,60338891 Nim's Island,2008,Adventure,48006503 Rush,2013,Action,26903709 Magnolia,1999,Drama,22450975 Cop Out,2010,Crime,44867349 How to Be Single,2016,Romance,46813366 Dolphin Tale,2011,Drama,72279690 Twilight,2008,Romance,191449475 John Q,2002,Thriller,71026631 Blue Streak,1999,Thriller,68208190 We're the Millers,2013,Comedy,150368971 Breakdown,1997,Thriller,50129186 Never Say Never Again,1983,Action,55500000 Hot Tub Time Machine,2010,Sci-Fi,50213619 Dolphin Tale 2,2014,Family,42019483 Reindeer Games,2000,Family,23360779 A Man Apart,2003,Action,26183197 Aloha,2015,Drama,20991497 Ghosts of Mississippi,1996,Drama,13052741 Snow Falling on Cedars,1999,Drama,14378353 The Rite,2011,Mystery,33037754 Gattaca,1997,Drama,12339633 Isn't She Great,2000,Biography,2954405 Space Chimps,2008,Animation,30105968 Head of State,2003,Comedy,37788228 The Hangover,2009,Comedy,277313371 Ip Man 3,2015,History,2126511 Austin Powers: The Spy Who Shagged Me,1999,Comedy,205399422 Batman,1989,Action,251188924 There Be Dragons,2011,War,1068392 Lethal Weapon 3,1992,Crime,144731527 The Blind Side,2009,Biography,255950375 Spy Kids,2001,Adventure,112692062 Horrible Bosses,2011,Crime,117528646 True Grit,2010,Adventure,171031347 The Devil Wears Prada,2006,Comedy,124732962 Star Trek: The Motion Picture,1979,Mystery,82300000 Identity Thief,2013,Comedy,134455175 Cape Fear,1991,Thriller,79100000 21,2008,Thriller,81159365 Trainwreck,2015,Romance,110008260 Guess Who,2005,Comedy,67962333 The English Patient,1996,War,78651430 L.A. Confidential,1997,Crime,64604977 Sky High,2005,Comedy,63939454 In & Out,1997,Comedy,63826569 Species,1995,Thriller,60054449 A Nightmare on Elm Street,1984,Horror,26505000 The Cell,2000,Horror,61280963 The Man in the Iron Mask,1998,Action,56876365 Secretariat,2010,Sport,59699513 TMNT,2007,Comedy,54132596 Radio,2003,Sport,52277485 Friends with Benefits,2011,Comedy,55802754 Neighbors 2: Sorority Rising,2016,Comedy,55291815 Saving Mr. Banks,2013,History,83299761 Malcolm X,1992,History,48169908 This Is 40,2012,Comedy,67523385 Old Dogs,2009,Comedy,49474048 Underworld: Rise of the Lycans,2009,Fantasy,45802315 License to Wed,2007,Comedy,43792641 The Benchwarmers,2006,Sport,57651794 Must Love Dogs,2005,Romance,43894863 Donnie Brasco,1997,Crime,41954997 Resident Evil,2002,Horror,39532308 Poltergeist,1982,Fantasy,76600000 The Ladykillers,2004,Comedy,39692139 Max Payne,2008,Crime,40687294 In Time,2011,Thriller,37553932 The Back-up Plan,2010,Comedy,37481242 Something Borrowed,2011,Comedy,39026186 Black Knight,2001,Adventure,33422806 Street Fighter,1994,Action,33423521 The Pianist,2002,War,32519322 From Hell,2001,Thriller,31598308 The Nativity Story,2006,Drama,37617947 House of Wax,2005,Horror,32048809 Closer,2004,Drama,33987757 J. Edgar,2011,Drama,37304950 Mirrors,2008,Horror,30691439 Queen of the Damned,2002,Horror,30307804 Predator 2,1990,Sci-Fi,30669413 Untraceable,2008,Crime,28687835 Blast from the Past,1999,Comedy,26494611 Jersey Girl,2004,Comedy,25266129 Alex Cross,2012,Thriller,25863915 Midnight in the Garden of Good and Evil,1997,Mystery,25078937 Nanny McPhee Returns,2010,Fantasy,28995450 Hoffa,1992,Biography,24276500 The X Files: I Want to Believe,2008,Drama,20981633 Ella Enchanted,2004,Fantasy,22913677 Concussion,2015,Drama,34531832 Abduction,2011,Thriller,28064226 Valiant,2005,Adventure,19447478 Wonder Boys,2000,Drama,19389454 Superhero Movie,2008,Sci-Fi,25871834 Broken City,2013,Thriller,19692608 Cursed,2005,Comedy,19294901 Premium Rush,2012,Action,20275446 Hot Pursuit,2015,Comedy,34507079 The Four Feathers,2002,Romance,18306166 Parker,2013,Action,17609982 Wimbledon,2004,Romance,16831505 Furry Vengeance,2010,Family,17596256 Lions for Lambs,2007,Thriller,14998070 Flight of the Intruder,1991,Action,14587732 Walk Hard: The Dewey Cox Story,2007,Comedy,18317151 The Shipping News,2001,Drama,11405825 American Outlaws,2001,Action,13264986 The Young Victoria,2009,History,10991381 Whiteout,2009,Action,10268846 The Tree of Life,2011,Drama,13303319 Knock Off,1998,Action,10076136 Sabotage,2014,Action,10499968 The Order,2003,Mystery,7659747 Punisher: War Zone,2008,Action,7948159 Zoom,2006,Family,11631245 The Walk,2015,Biography,10137502 Warriors of Virtue,1997,Action,6448817 A Good Year,2006,Comedy,7458269 Radio Flyer,1992,Drama,4651977 "Blood In, Blood Out",1993,Drama,4496583 Smilla's Sense of Snow,1997,Thriller,2221994 Femme Fatale,2002,Thriller,6592103 Ride with the Devil,1999,War,630779 The Maze Runner,2014,Thriller,102413606 Unfinished Business,2015,Comedy,10214013 The Age of Innocence,1993,Romance,32000000 The Fountain,2006,Drama,10139254 Chill Factor,1999,Comedy,11227940 Stolen,2012,Thriller,183125 Ponyo,2008,Fantasy,15081783 The Longest Ride,2015,Romance,37432299 The Astronaut's Wife,1999,Sci-Fi,10654581 I Dreamed of Africa,2000,Romance,6543194 Playing for Keeps,2012,Romance,13101142 Mandela: Long Walk to Freedom,2013,Biography,8324748 A Few Good Men,1992,Drama,141340178 Exit Wounds,2001,Drama,51758599 Big Momma's House,2000,Comedy,117559438 The Darkest Hour,2011,Thriller,21426805 Step Up Revolution,2012,Romance,35057332 Snakes on a Plane,2006,Action,34014398 The Watcher,2000,Horror,28927720 The Punisher,2004,Crime,33682273 Goal! The Dream Begins,2005,Romance,4280577 Safe,2012,Crime,17120019 Pushing Tin,1999,Comedy,8406264 Star Wars: Episode VI - Return of the Jedi,1983,Sci-Fi,309125409 Doomsday,2008,Action,10955425 The Reader,2008,Romance,34180954 Elf,2003,Family,173381405 Phenomenon,1996,Fantasy,104632573 Snow Dogs,2002,Comedy,81150692 Scrooged,1988,Drama,60328558 Nacho Libre,2006,Comedy,80197993 Bridesmaids,2011,Romance,169076745 This Is the End,2013,Fantasy,101470202 Stigmata,1999,Horror,50041732 Men of Honor,2000,Biography,48814909 Takers,2010,Crime,57744720 The Big Wedding,2013,Comedy,21784432 "Big Mommas: Like Father, Like Son",2011,Comedy,37911876 Source Code,2011,Mystery,54696902 Alive,1993,Adventure,36733909 The Number 23,2007,Thriller,35063732 The Young and Prodigious T.S. Spivet,2013,Family,99462 Dreamer: Inspired by a True Story,2005,Drama,32701088 A History of Violence,2005,Crime,31493782 Transporter 2,2005,Crime,43095600 The Quick and the Dead,1995,Thriller,18636537 Laws of Attraction,2004,Comedy,17848322 Bringing Out the Dead,1999,Drama,16640210 Repo Men,2010,Thriller,13763130 Dragon Wars: D-War,2007,Horror,10956379 Bogus,1996,Fantasy,4357000 The Incredible Burt Wonderstone,2013,Comedy,22525921 Cats Don't Dance,1997,Fantasy,3562749 Cradle Will Rock,1999,Drama,2899970 The Good German,2006,Thriller,1304837 Apocalypse Now,1979,War,78800000 Going the Distance,2010,Comedy,17797316 Mr. Holland's Opus,1995,Drama,82528097 Criminal,2016,Thriller,14268533 Out of Africa,1985,Romance,87100000 Flight,2012,Thriller,93749203 Moonraker,1979,Sci-Fi,62700000 The Grand Budapest Hotel,2014,Crime,59073773 Hearts in Atlantis,2001,Mystery,24185781 Arachnophobia,1990,Fantasy,53133888 Frequency,2000,Sci-Fi,44983704 Ghostbusters,2016,Fantasy,118099659 Vacation,2015,Comedy,58879132 Get Shorty,1995,Crime,72077000 Chicago,2002,Musical,170684505 Big Daddy,1999,Comedy,163479795 American Pie 2,2001,Comedy,145096820 Toy Story,1995,Comedy,191796233 Speed,1994,Thriller,121248145 The Vow,2012,Drama,125014030 Extraordinary Measures,2010,Drama,11854694 Remember the Titans,2000,Biography,115648585 The Hunt for Red October,1990,Action,122012643 Lee Daniels' The Butler,2013,Biography,116631310 Dodgeball: A True Underdog Story,2004,Comedy,114324072 The Addams Family,1991,Fantasy,113502246 Ace Ventura: When Nature Calls,1995,Comedy,108360000 The Princess Diaries,2001,Comedy,108244774 The First Wives Club,1996,Comedy,105444419 Se7en,1995,Crime,100125340 District 9,2009,Sci-Fi,115646235 The SpongeBob SquarePants Movie,2004,Animation,85416609 Mystic River,2003,Mystery,90135191 Million Dollar Baby,2004,Sport,100422786 Analyze This,1999,Crime,106694016 The Notebook,2004,Drama,64286 27 Dresses,2008,Romance,76806312 Hannah Montana: The Movie,2009,Romance,79566871 Rugrats in Paris: The Movie,2000,Comedy,76501438 The Prince of Tides,1991,Romance,74787599 Legends of the Fall,1994,War,66528842 Up in the Air,2009,Romance,83813460 About Schmidt,2002,Comedy,65010106 Warm Bodies,2013,Romance,66359959 Looper,2012,Crime,66468315 Down to Earth,2001,Comedy,64172251 Babe,1995,Drama,66600000 Hope Springs,2012,Romance,63536011 Forgetting Sarah Marshall,2008,Romance,62877175 Four Brothers,2005,Thriller,74484168 Baby Mama,2008,Comedy,60269340 Hope Floats,1998,Romance,60033780 Bride Wars,2009,Comedy,58715510 Without a Paddle,2004,Adventure,58156435 13 Going on 30,2004,Romance,56044241 Midnight in Paris,2011,Comedy,56816662 The Nut Job,2014,Adventure,64238770 Blow,2001,Drama,52937130 Message in a Bottle,1999,Drama,52799004 Star Trek V: The Final Frontier,1989,Thriller,55210049 Like Mike,2002,Sport,51432423 Naked Gun 33 1/3: The Final Insult,1994,Crime,51109400 A View to a Kill,1985,Adventure,50300000 The Curse of the Were-Rabbit,2005,Mystery,56068547 P.S. I Love You,2007,Drama,53680848 Atonement,2007,Mystery,50921738 Letters to Juliet,2010,Romance,53021560 Black Rain,1989,Action,45645204 Corpse Bride,2005,Romance,53337608 Sicario,2015,Mystery,46875468 Southpaw,2015,Drama,52418902 Drag Me to Hell,2009,Thriller,42057340 The Age of Adaline,2015,Drama,42478175 Secondhand Lions,2003,Drama,41407470 Step Up 3D,2010,Music,42385520 Blue Crush,2002,Romance,40118420 Stranger Than Fiction,2006,Fantasy,40137776 30 Days of Night,2007,Horror,39568996 The Cabin in the Woods,2012,Fantasy,42043633 Meet the Spartans,2008,Comedy,38232624 Midnight Run,1988,Action,38413606 The Running Man,1987,Action,38122105 Little Shop of Horrors,1986,Sci-Fi,38747385 Hanna,2011,Thriller,40247512 Mortal Kombat: Annihilation,1997,Fantasy,35927406 Larry Crowne,2011,Comedy,35565975 Carrie,2013,Horror,35266619 Take the Lead,2006,Music,34703228 Gridiron Gang,2006,Sport,38432823 What's the Worst That Could Happen?,2001,Crime,32095318 9,2009,Mystery,31743332 Side Effects,2013,Crime,32154410 Winnie the Pooh,2011,Animation,26687172 Dumb and Dumberer: When Harry Met Lloyd,2003,Comedy,26096584 Bulworth,1998,Drama,26525834 Get on Up,2014,Biography,30513940 One True Thing,1998,Drama,23209440 Virtuosity,1995,Thriller,24048000 My Super Ex-Girlfriend,2006,Sci-Fi,22526144 Deliver Us from Evil,2014,Thriller,30523568 Sanctum,2011,Adventure,23070045 Little Black Book,2004,Comedy,20422207 The Five-Year Engagement,2012,Romance,28644770 Mr 3000,2004,Drama,21800302 The Next Three Days,2010,Drama,21129348 Ultraviolet,2006,Thriller,18500966 Assault on Precinct 13,2005,Action,19976073 The Replacement Killers,1998,Thriller,18967571 Fled,1996,Romance,17100000 Eight Legged Freaks,2002,Horror,17266505 Love & Other Drugs,2010,Comedy,32357532 88 Minutes,2007,Thriller,16930884 North Country,2005,Drama,18324242 The Whole Ten Yards,2004,Thriller,16323969 Splice,2009,Sci-Fi,16999046 Howard the Duck,1986,Romance,16295774 Pride and Glory,2008,Crime,15709385 The Cave,2005,Thriller,14888028 Alex & Emma,2003,Comedy,14208384 Wicker Park,2004,Thriller,12831121 Fright Night,2011,Horror,18298649 The New World,2005,History,12712093 Wing Commander,1999,Sci-Fi,11576087 In Dreams,1999,Thriller,11900000 Dragonball: Evolution,2009,Thriller,9353573 The Last Stand,2013,Crime,12026670 Godsend,2004,Drama,14334645 Chasing Liberty,2004,Romance,12189514 Hoodwinked Too! Hood vs. Evil,2011,Animation,10134754 An Unfinished Life,2005,Drama,8535575 The Imaginarium of Doctor Parnassus,2009,Fantasy,7689458 Runner Runner,2013,Crime,19316646 Antitrust,2001,Thriller,10965209 Glory,1989,War,26830000 Once Upon a Time in America,1984,Crime,5300000 Dead Man Down,2013,Thriller,10880926 The Merchant of Venice,2004,Drama,3752725 The Good Thief,2002,Crime,3517797 Miss Potter,2006,Biography,2975649 The Promise,2005,Fantasy,668171 DOA: Dead or Alive,2006,Adventure,480314 The Assassination of Jesse James by the Coward Robert Ford,2007,History,3904982 1911,2011,History,127437 Machine Gun Preacher,2011,Biography,537580 Pitch Perfect 2,2015,Comedy,183436380 Walk the Line,2005,Biography,119518352 Keeping the Faith,2000,Drama,37036404 The Borrowers,1997,Family,22359293 Frost/Nixon,2008,Drama,18593156 Serving Sara,2002,Comedy,16930185 The Boss,2016,Comedy,63034755 Cry Freedom,1987,Biography,5899797 Mumford,1999,Drama,4554569 Seed of Chucky,2004,Comedy,17016190 The Jacket,2005,Drama,6301131 Aladdin,1992,Animation,217350219 Straight Outta Compton,2015,Crime,161029270 Indiana Jones and the Temple of Doom,1984,Adventure,179870271 The Rugrats Movie,1998,Drama,100491683 Along Came a Spider,2001,Drama,74058698 Once Upon a Time in Mexico,2003,Thriller,55845943 Die Hard,1988,Action,81350242 Role Models,2008,Comedy,67266300 The Big Short,2015,Biography,70235322 Taking Woodstock,2009,Comedy,7443007 Miracle,2004,Sport,64371181 Dawn of the Dead,2004,Thriller,58885635 The Wedding Planner,2001,Romance,60400856 The Royal Tenenbaums,2001,Comedy,52353636 Identity,2003,Thriller,51475962 Last Vegas,2013,Romance,63910583 For Your Eyes Only,1981,Thriller,62300000 Serendipity,2001,Comedy,49968653 Timecop,1994,Thriller,44450000 Zoolander,2001,Comedy,45162741 Safe Haven,2013,Thriller,71346930 Hocus Pocus,1993,Family,39514713 No Reservations,2007,Romance,43097652 Kick-Ass,2010,Comedy,48043505 30 Minutes or Less,2011,Action,37053924 Dracula 2000,2000,Action,33000377 "Alexander and the Terrible, Horrible, No Good, Very Bad Day",2014,Family,66950483 Pride & Prejudice,2005,Romance,38372662 Blade Runner,1982,Thriller,27000000 Rob Roy,1995,Biography,31600000 3 Days to Kill,2014,Drama,30688364 We Own the Night,2007,Thriller,28563179 Lost Souls,2000,Drama,16779636 Just My Luck,2006,Romance,17324744 "Mystery, Alaska",1999,Comedy,8888143 The Spy Next Door,2010,Action,24268828 A Simple Wish,1997,Fantasy,8119205 Ghosts of Mars,2001,Action,8434601 Our Brand Is Crisis,2015,Comedy,6998324 Pride and Prejudice and Zombies,2016,Romance,10907291 Kundun,1997,Drama,5532301 How to Lose Friends & Alienate People,2008,Drama,2775593 Kick-Ass 2,2013,Comedy,28751715 Brick Mansions,2014,Action,20285518 Octopussy,1983,Adventure,67900000 Knocked Up,2007,Comedy,148734225 My Sister's Keeper,2009,Drama,49185998 "Welcome Home, Roscoe Jenkins",2008,Comedy,42168445 A Passage to India,1984,History,26400000 Notes on a Scandal,2006,Crime,17508670 Rendition,2007,Drama,9664316 Star Trek VI: The Undiscovered Country,1991,Action,74888996 Divine Secrets of the Ya-Ya Sisterhood,2002,Drama,69586544 The Jungle Book,2016,Drama,362645141 Kiss the Girls,1997,Drama,60491560 The Blues Brothers,1980,Crime,54200000 Joyful Noise,2012,Music,30920167 About a Boy,2002,Comedy,40566655 Lake Placid,1999,Action,31768374 Lucky Number Slevin,2006,Mystery,22494487 The Right Stuff,1983,Drama,21500000 Anonymous,2011,Drama,4463292 Dark City,1998,Drama,14337579 The Duchess,2008,Biography,13823741 The Newton Boys,1998,Western,10297897 Case 39,2009,Mystery,13248477 Suspect Zero,2004,Mystery,8712564 Martian Child,2007,Family,7486906 Spy Kids: All the Time in the World in 4D,2011,Comedy,38536376 Money Monster,2016,Thriller,41008532 Formula 51,2001,Thriller,5204007 Flawless,1999,Crime,4485485 Mindhunters,2004,Crime,4476235 What Just Happened,2008,Drama,1089365 The Statement,2003,Thriller,763044 Paul Blart: Mall Cop,2009,Action,20819129 Freaky Friday,2003,Romance,110222438 The 40-Year-Old Virgin,2005,Comedy,109243478 Shakespeare in Love,1998,Drama,100241322 A Walk Among the Tombstones,2014,Mystery,25977365 Kindergarten Cop,1990,Action,91457688 Pineapple Express,2008,Crime,87341380 Ever After: A Cinderella Story,1998,Comedy,65703412 Open Range,2003,Western,58328680 Flatliners,1990,Sci-Fi,61490000 A Bridge Too Far,1977,War,50800000 Red Eye,2005,Mystery,57859105 Final Destination 2,2003,Horror,46455802 "O Brother, Where Art Thou?",2000,Adventure,45506619 Legion,2010,Action,40168080 Pain & Gain,2013,Crime,49874933 In Good Company,2004,Romance,45489752 Clockstoppers,2002,Action,36985501 Silverado,1985,Action,33200000 Brothers,2009,Thriller,28501651 Agent Cody Banks 2: Destination London,2004,Family,23222861 New Year's Eve,2011,Comedy,54540525 Original Sin,2001,Romance,16252765 The Raven,2012,Thriller,16005978 Welcome to Mooseport,2004,Romance,14469428 Highlander: The Final Dimension,1994,Fantasy,13829734 Blood and Wine,1996,Drama,1075288 The Curse of the Jade Scorpion,2001,Comedy,7496522 Flipper,1996,Adventure,20047715 Self/less,2015,Mystery,12276810 The Constant Gardener,2005,Romance,33565375 The Passion of the Christ,2004,Drama,499263 Mrs. Doubtfire,1993,Comedy,219200000 Rain Man,1988,Drama,172825435 Gran Torino,2008,Drama,148085755 W.,2008,Biography,25517500 Taken,2008,Action,145000989 The Best of Me,2014,Romance,26761283 The Bodyguard,1992,Action,121945720 Schindler's List,1993,Biography,96067179 The Help,2011,Drama,169705587 The Fifth Estate,2013,Biography,3254172 Scooby-Doo 2: Monsters Unleashed,2004,Comedy,84185387 Freddy vs. Jason,2003,Thriller,82163317 Jimmy Neutron: Boy Genius,2001,Sci-Fi,80920948 Cloverfield,2008,Adventure,80034302 Teenage Mutant Ninja Turtles II: The Secret of the Ooze,1991,Adventure,78656813 The Untouchables,1987,Thriller,76270454 No Country for Old Men,2007,Drama,74273505 Ride Along,2014,Action,134141530 Bridget Jones's Diary,2001,Comedy,71500556 Chocolat,2000,Romance,71309760 "Legally Blonde 2: Red, White & Blonde",2003,Comedy,89808372 Parental Guidance,2012,Comedy,77264926 No Strings Attached,2011,Comedy,70625986 Tombstone,1993,Romance,56505065 Romeo Must Die,2000,Action,55973336 Final Destination 3,2006,Horror,54098051 The Lucky One,2012,Drama,60443237 Bridge to Terabithia,2007,Family,82234139 Finding Neverland,2004,Family,51676606 A Madea Christmas,2013,Comedy,52528330 The Grey,2011,Thriller,51533608 Hide and Seek,2005,Horror,51097664 Anchorman: The Legend of Ron Burgundy,2004,Comedy,84136909 Goodfellas,1990,Drama,46836394 Agent Cody Banks,2003,Adventure,47285499 Nanny McPhee,2005,Fantasy,47124400 Scarface,1983,Crime,44700000 Nothing to Lose,1997,Adventure,44455658 The Last Emperor,1987,Biography,43984230 Contraband,2012,Drama,66489425 Money Talks,1997,Comedy,41067398 There Will Be Blood,2007,Drama,40218903 The Wild Thornberrys Movie,2002,Animation,39880476 Rugrats Go Wild,2003,Musical,39399750 Undercover Brother,2002,Action,38230435 The Sisterhood of the Traveling Pants,2005,Romance,39008741 Kiss of the Dragon,2001,Crime,36833473 The House Bunny,2008,Romance,48237389 Million Dollar Arm,2014,Sport,36447959 The Giver,2014,Romance,45089048 What a Girl Wants,2003,Drama,35990505 Jeepers Creepers II,2003,Horror,35143332 Good Luck Chuck,2007,Romance,35000629 Cradle 2 the Grave,2003,Crime,34604054 The Hours,2002,Drama,41597830 She's the Man,2006,Romance,33687630 Mr. Bean's Holiday,2007,Family,32553210 Anacondas: The Hunt for the Blood Orchid,2004,Horror,31526393 Blood Ties,2013,Drama,41229 August Rush,2007,Drama,31655091 Elizabeth,1998,History,30012990 Bride of Chucky,1998,Horror,32368960 Tora! Tora! Tora!,1970,Action,14500000 Spice World,1997,Music,29247405 Dance Flick,2009,Music,25615792 The Shawshank Redemption,1994,Crime,28341469 Crocodile Dundee in Los Angeles,2001,Adventure,25590119 Kingpin,1996,Comedy,24944213 The Gambler,2014,Drama,33631221 August: Osage County,2013,Drama,37738400 A Lot Like Love,2005,Romance,21835784 Eddie the Eagle,2016,Drama,15785632 He Got Game,1998,Sport,21554585 Don Juan DeMarco,1994,Romance,22200000 The Losers,2010,Mystery,23527955 Don't Be Afraid of the Dark,2010,Horror,24042490 War,2007,Thriller,22466994 Punch-Drunk Love,2002,Comedy,17791031 EuroTrip,2004,Comedy,17718223 Half Past Dead,2002,Crime,15361537 Unaccompanied Minors,2006,Adventure,16647384 "Bright Lights, Big City",1988,Drama,16118077 The Adventures of Pinocchio,1996,Adventure,15091542 The Box,2009,Thriller,15045676 The Ruins,2008,Horror,17427926 The Next Best Thing,2000,Comedy,14983572 My Soul to Take,2010,Mystery,14637490 The Girl Next Door,2004,Comedy,14589444 Maximum Risk,1996,Romance,14095303 Stealing Harvard,2002,Crime,13973532 Legend,2015,Crime,1865774 Shark Night 3D,2011,Thriller,18860403 Angela's Ashes,1999,Drama,13038660 Draft Day,2014,Sport,28831145 The Conspirator,2010,Crime,11538204 Lords of Dogtown,2005,Sport,11008432 The 33,2015,Drama,12188642 Big Trouble in Little China,1986,Adventure,11100000 Warrior,2011,Sport,13651662 Michael Collins,1996,Biography,11030963 Gettysburg,1993,Drama,10769960 Stop-Loss,2008,War,10911750 Abandon,2002,Mystery,10719367 Brokedown Palace,1999,Mystery,10114315 The Possession,2012,Horror,49122319 Mrs. Winterbourne,1996,Romance,10070000 Straw Dogs,2011,Action,10324441 The Hoax,2006,Drama,7156933 Stone Cold,1991,Thriller,9286314 The Road,2009,Adventure,56692 Underclassman,2005,Thriller,5654777 Say It Isn't So,2001,Comedy,5516708 The World's Fastest Indian,2005,Sport,5128124 Snakes on a Plane,2006,Action,34014398 Tank Girl,1995,Action,4064333 King's Ransom,2005,Crime,4006906 Blindness,2008,Thriller,3073392 BloodRayne,2005,Action,1550000 Where the Truth Lies,2005,Mystery,871527 Without Limits,1998,Sport,777423 Me and Orson Welles,2008,Drama,1186957 The Best Offer,2013,Crime,85433 Bad Lieutenant: Port of Call New Orleans,2009,Crime,1697956 Little White Lies,2010,Comedy,183662 Love Ranch,2010,Sport,134904 The Counselor,2013,Drama,16969390 Dangerous Liaisons,1988,Drama,34700000 On the Road,2012,Adventure,717753 Star Trek IV: The Voyage Home,1986,Sci-Fi,109713132 Rocky Balboa,2006,Drama,70269171 Point Break,2015,Sport,28772222 Scream 2,1997,Horror,101334374 Jane Got a Gun,2016,Drama,1512815 Think Like a Man Too,2014,Comedy,65182182 The Whole Nine Yards,2000,Comedy,57262492 Footloose,1984,Music,80000000 Old School,2003,Comedy,74608545 The Fisher King,1991,Comedy,41895491 I Still Know What You Did Last Summer,1998,Mystery,39989008 Return to Me,2000,Romance,32662299 Zack and Miri Make a Porno,2008,Romance,31452765 Nurse Betty,2000,Comedy,25167270 The Men Who Stare at Goats,2009,War,32416109 Double Take,2001,Crime,20218 "Girl, Interrupted",1999,Biography,28871190 Win a Date with Tad Hamilton!,2004,Comedy,16964743 Muppets from Space,1999,Comedy,16290976 The Wiz,1978,Music,13000000 Ready to Rumble,2000,Sport,12372410 Play It to the Bone,1999,Drama,8427204 I Don't Know How She Does It,2011,Comedy,9639242 Piranha 3D,2010,Horror,25003072 Beyond the Sea,2004,Drama,6144806 The Princess and the Cobbler,1993,Animation,669276 The Bridge of San Luis Rey,2004,Drama,42880 Faster,2010,Crime,23225911 Howl's Moving Castle,2004,Adventure,4710455 Zombieland,2009,Sci-Fi,75590286 King Kong,2005,Drama,218051260 The Waterboy,1998,Comedy,161487252 Star Wars: Episode V - The Empire Strikes Back,1980,Fantasy,290158751 Bad Boys,1995,Crime,65807024 The Naked Gun 2½: The Smell of Fear,1991,Comedy,86930411 Final Destination,2000,Thriller,53302314 The Ides of March,2011,Drama,40962534 Pitch Black,2000,Horror,39235088 Someone Like You...,2001,Romance,27338033 Her,2013,Drama,25556065 Eddie the Eagle,2016,Sport,15785632 Joy Ride,2001,Thriller,21973182 The Adventurer: The Curse of the Midas Box,2013,Fantasy,4756 Anywhere But Here,1999,Drama,18653615 Chasing Liberty,2004,Romance,12189514 The Crew,2000,Crime,13019253 Haywire,2011,Thriller,18934858 Jaws: The Revenge,1987,Horror,20763013 Marvin's Room,1996,Drama,12782508 The Longshots,2008,Family,11508423 The End of the Affair,1999,Drama,10660147 Harley Davidson and the Marlboro Man,1991,Western,7434726 Coco Before Chanel,2009,Biography,6109075 Chéri,2009,Drama,2708188 Vanity Fair,2004,Drama,16123851 1408,2007,Horror,71975611 Spaceballs,1987,Comedy,38119483 The Water Diviner,2014,Drama,4190530 Ghost,1990,Fantasy,217631306 There's Something About Mary,1998,Romance,176483808 The Santa Clause,1994,Fantasy,144833357 The Rookie,2002,Sport,75597042 The Game Plan,2007,Sport,90636983 The Bridges of Madison County,1995,Drama,70960517 The Animal,2001,Comedy,55762229 The Hundred-Foot Journey,2014,Comedy,54235441 The Net,1995,Mystery,50728000 I Am Sam,2001,Drama,40270895 Son of God,2014,History,59696176 Underworld,2003,Fantasy,51483949 Derailed,2005,Drama,36020063 The Informant!,2009,Drama,33313582 Shadowlands,1993,Drama,25842000 Deuce Bigalow: European Gigolo,2005,Comedy,22264487 Delivery Man,2013,Drama,30659817 Victor Frankenstein,2015,Drama,5773519 Saving Silverman,2001,Comedy,19351569 Diary of a Wimpy Kid: Dog Days,2012,Comedy,49002815 Summer of Sam,1999,Thriller,19283782 Jay and Silent Bob Strike Back,2001,Comedy,30059386 The Island,2005,Sci-Fi,35799026 The Glass House,2001,Thriller,17951431 "Hail, Caesar!",2016,Comedy,29997095 Josie and the Pussycats,2001,Comedy,14252830 Homefront,2013,Action,19783777 The Little Vampire,2000,Adventure,13555988 I Heart Huckabees,2004,Comedy,12784713 RoboCop 3,1993,Crime,10696210 Megiddo: The Omega Code 2,2001,Action,5974653 Darling Lili,1970,Drama,5000000 Dudley Do-Right,1999,Romance,9694105 The Transporter Refueled,2015,Thriller,16027866 Black Book,2006,War,4398392 Joyeux Noel,2005,Music,1050445 Hit and Run,2012,Action,13746550 Mad Money,2008,Thriller,20668843 Before I Go to Sleep,2014,Mystery,2963012 Stone,2010,Thriller,1796024 Molière,2007,Comedy,634277 Out of the Furnace,2013,Crime,11326836 Michael Clayton,2007,Thriller,49024969 My Fellow Americans,1996,Comedy,22294341 Arlington Road,1999,Crime,24362501 To Rome with Love,2012,Comedy,16684352 Firefox,1982,Action,46700000 South Park: Bigger Longer & Uncut,1999,Fantasy,52008288 Death at a Funeral,2007,Comedy,8579684 Teenage Mutant Ninja Turtles III,1993,Fantasy,42660000 Hardball,2001,Sport,40219708 Silver Linings Playbook,2012,Romance,132088910 Freedom Writers,2007,Crime,36581633 The Transporter,2002,Action,25296447 Never Back Down,2008,Sport,24848292 The Rage: Carrie 2,1999,Thriller,17757087 Away We Go,2009,Drama,9430988 Swing Vote,2008,Drama,16284360 Moonlight Mile,2002,Romance,6830957 Tinker Tailor Soldier Spy,2011,Drama,24104113 Molly,1999,Drama,15593 The Beaver,2011,Drama,958319 The Best Little Whorehouse in Texas,1982,Comedy,69700000 eXistenZ,1999,Horror,2840417 Raiders of the Lost Ark,1981,Action,242374454 Home Alone 2: Lost in New York,1992,Comedy,173585516 Close Encounters of the Third Kind,1977,Sci-Fi,128300000 Pulse,2006,Thriller,20259297 Beverly Hills Cop II,1987,Comedy,153665036 Bringing Down the House,2003,Comedy,132541238 The Silence of the Lambs,1991,Crime,130727000 Wayne's World,1992,Comedy,121697350 Jackass 3D,2010,Comedy,117224271 Jaws 2,1978,Thriller,102922376 Beverly Hills Chihuahua,2008,Comedy,94497271 The Conjuring,2013,Thriller,137387272 Are We There Yet?,2005,Family,82301521 Tammy,2014,Comedy,84518155 Disturbia,2007,Drama,80050171 School of Rock,2003,Music,81257845 Mortal Kombat,1995,Thriller,70360285 Wicker Park,2004,Drama,12831121 White Chicks,2004,Crime,69148997 The Descendants,2011,Drama,82624961 Holes,2003,Family,67325559 The Last Song,2010,Romance,62933793 12 Years a Slave,2013,Biography,56667870 Drumline,2002,Music,56398162 Why Did I Get Married Too?,2010,Romance,60072596 Edward Scissorhands,1990,Romance,56362352 Me Before You,2016,Romance,56154094 Madea's Witness Protection,2012,Crime,65623128 Date Movie,2006,Romance,48546578 Return to Never Land,2002,Adventure,48423368 Selma,2014,Drama,52066000 The Jungle Book 2,2003,Animation,47887943 Boogeyman,2005,Thriller,46363118 Premonition,2007,Drama,47852604 The Tigger Movie,2000,Drama,45542421 Max,2015,Family,42652003 Epic Movie,2007,Comedy,39737645 Conan the Barbarian,1982,Adventure,37567440 Spotlight,2015,History,44988180 Lakeview Terrace,2008,Crime,39263506 The Grudge 2,2006,Horror,39143839 How Stella Got Her Groove Back,1998,Drama,37672350 Bill & Ted's Bogus Journey,1991,Music,38037513 Man of the Year,2006,Comedy,37442180 The American,2010,Crime,35596227 Selena,1997,Music,35422828 Vampires Suck,2010,Comedy,36658108 Babel,2006,Drama,34300771 This Is Where I Leave You,2014,Comedy,34290142 Doubt,2008,Drama,33422556 Team America: World Police,2004,Comedy,32774834 Texas Chainsaw 3D,2013,Thriller,34334256 Copycat,1995,Drama,32051917 Scary Movie 5,2013,Comedy,32014289 Milk,2008,Drama,31838002 Risen,2016,Mystery,36874745 Ghost Ship,2002,Horror,30079316 A Very Harold & Kumar 3D Christmas,2011,Comedy,35033759 Wild Things,1998,Mystery,29753944 The Debt,2010,Drama,31146570 High Fidelity,2000,Drama,27277055 One Missed Call,2008,Mystery,26876529 Eye for an Eye,1996,Crime,53146000 The Bank Job,2008,Romance,30028592 Eternal Sunshine of the Spotless Mind,2004,Drama,34126138 You Again,2010,Family,25677801 Street Kings,2008,Drama,26415649 The World's End,2013,Comedy,26003149 Nancy Drew,2007,Comedy,25584685 Daybreakers,2009,Thriller,29975979 She's Out of My League,2010,Comedy,31584722 Monte Carlo,2011,Family,23179303 Stay Alive,2006,Thriller,23078294 Quigley Down Under,1990,Drama,21413105 Alpha and Omega,2010,Comedy,25077977 The Covenant,2006,Fantasy,23292105 Shorts,2009,Family,20916309 To Die For,1995,Drama,21200000 Vampires,1998,Action,20241395 Psycho,1960,Mystery,32000000 My Best Friend's Girl,2008,Romance,19151864 Endless Love,2014,Romance,23393765 Georgia Rule,2007,Comedy,18882880 Under the Rainbow,1981,Comedy,8500000 Simon Birch,1998,Drama,18252684 Reign Over Me,2007,Drama,19661987 Into the Wild,2007,Biography,18352454 School for Scoundrels,2006,Comedy,17803796 Silent Hill: Revelation 3D,2012,Horror,17529157 From Dusk Till Dawn,1996,Crime,25753840 Pooh's Heffalump Movie,2005,Animation,18081626 Home for the Holidays,1995,Comedy,17518220 Kung Fu Hustle,2004,Action,17104669 The Country Bears,2002,Family,16988996 The Kite Runner,2007,Drama,15797907 21 Grams,2003,Drama,16248701 Paparazzi,2004,Crime,15712072 Twilight,2008,Romance,191449475 A Guy Thing,2003,Romance,15408822 Loser,2000,Comedy,15464026 The Greatest Story Ever Told,1965,History,8000000 Disaster Movie,2008,Comedy,14174654 Armored,2009,Thriller,15988876 The Man Who Knew Too Little,1997,Thriller,13801755 What's Your Number?,2011,Romance,13987482 Lockout,2012,Thriller,14291570 Envy,2004,Comedy,12181484 Crank: High Voltage,2009,Crime,13630226 Bullets Over Broadway,1994,Crime,13383737 One Night with the King,2006,Drama,13391174 The Quiet American,2002,War,12987647 The Weather Man,2005,Drama,12469811 Undisputed,2002,Action,12398628 Ghost Town,2008,Fantasy,13214030 12 Rounds,2009,Action,12232937 Let Me In,2010,Horror,12134420 3 Ninjas Kick Back,1994,Action,11784000 Be Kind Rewind,2008,Comedy,11169531 Mrs Henderson Presents,2005,War,11034436 Triple 9,2016,Crime,12626905 Deconstructing Harry,1997,Comedy,10569071 Three to Tango,1999,Romance,10544143 Burnt,2015,Comedy,13650738 We're No Angels,1989,Comedy,10555348 Everyone Says I Love You,1996,Musical,9714482 Death at a Funeral,2007,Comedy,8579684 Death Sentence,2007,Crime,9525276 Everybody's Fine,2009,Adventure,8855646 Superbabies: Baby Geniuses 2,2004,Family,9109322 The Man,2005,Action,8326035 Code Name: The Cleaner,2007,Crime,8104069 Connie and Carla,2004,Comedy,8054280 Inherent Vice,2014,Romance,8093318 Doogal,2006,Adventure,7382993 Battle of the Year,2013,Music,8888355 An American Carol,2008,Comedy,7001720 Machete Kills,2013,Action,7268659 Willard,2003,Horror,6852144 Strange Wilderness,2008,Adventure,6563357 Topsy-Turvy,1999,Drama,6201757 A Dangerous Method,2011,Thriller,5702083 A Scanner Darkly,2006,Mystery,5480996 Chasing Mavericks,2012,Sport,6002756 Alone in the Dark,2005,Sci-Fi,5132655 Bandslam,2009,Family,5205343 Birth,2004,Thriller,5005883 A Most Violent Year,2014,Crime,5749134 Flash of Genius,2008,Drama,4234040 I'm Not There.,2007,Drama,4001121 The Cold Light of Day,2012,Thriller,3749061 The Brothers Bloom,2008,Drama,3519627 "Synecdoche, New York",2008,Drama,3081925 Princess Mononoke,1997,Adventure,2298191 Bon voyage,2003,Mystery,2353728 Can't Stop the Music,1980,Musical,2000000 The Proposition,2005,Western,1900725 Courage,2015,Biography,2246000 Marci X,2003,Comedy,1646664 Equilibrium,2002,Thriller,1190018 The Children of Huang Shi,2008,War,1027749 The Yards,2000,Crime,882710 By the Sea,2015,Drama,531009 Steamboy,2004,Family,410388 The Game of Their Lives,2005,Drama,375474 Rapa Nui,1994,History,305070 Dylan Dog: Dead of Night,2010,Crime,1183354 People I Know,2002,Drama,121972 The Tempest,2010,Fantasy,263365 The Painted Veil,2006,Romance,8047690 The Baader Meinhof Complex,2008,Drama,476270 Dances with Wolves,1990,Adventure,184208848 Bad Teacher,2011,Comedy,100292856 Sea of Love,1989,Crime,58571513 A Cinderella Story,2004,Family,51431160 Scream,1996,Mystery,103001286 Thir13en Ghosts,2001,Horror,41867960 Back to the Future,1985,Sci-Fi,210609762 House on Haunted Hill,1999,Horror,40846082 I Can Do Bad All by Myself,2009,Comedy,51697449 The Switch,2010,Romance,27758465 Just Married,2003,Romance,56127162 The Devil's Double,2011,Biography,1357042 Thomas and the Magic Railroad,2000,Comedy,15911333 The Crazies,2010,Thriller,39103378 Spirited Away,2001,Family,10049886 The Bounty,1984,Adventure,8600000 The Book Thief,2013,Drama,21483154 Sex Drive,2008,Adventure,8396942 Leap Year,2010,Comedy,12561 Take Me Home Tonight,2011,Romance,6923891 The Nutcracker,1993,Fantasy,2119994 Kansas City,1996,Drama,1292527 The Amityville Horror,2005,Thriller,64255243 Adaptation.,2002,Drama,22245861 Land of the Dead,2005,Horror,20433940 Fear and Loathing in Las Vegas,1998,Comedy,10562387 The Invention of Lying,2009,Comedy,18439082 Neighbors,2014,Comedy,150056505 The Mask,1994,Action,119938730 Big,1988,Fantasy,114968774 Borat: Cultural Learnings of America for Make Benefit Glorious Nation of Kazakhstan,2006,Comedy,128505958 Legally Blonde,2001,Romance,95001351 Star Trek III: The Search for Spock,1984,Action,76400000 The Exorcism of Emily Rose,2005,Drama,75072454 Deuce Bigalow: Male Gigolo,1999,Romance,65535067 Left Behind,2014,Thriller,13998282 The Family Stone,2005,Comedy,6061759 Barbershop 2: Back in Business,2004,Drama,64955956 Bad Santa,2003,Drama,60057639 Austin Powers: International Man of Mystery,1997,Comedy,53868030 My Big Fat Greek Wedding 2,2016,Family,59573085 Diary of a Wimpy Kid: Rodrick Rules,2011,Comedy,52691009 Predator,1987,Sci-Fi,59735548 Amadeus,1984,History,51600000 Prom Night,2008,Horror,43818159 Mean Girls,2004,Comedy,86049418 Under the Tuscan Sun,2003,Romance,43601508 Gosford Park,2001,Mystery,41300105 Peggy Sue Got Married,1986,Comedy,41382841 Birdman or (The Unexpected Virtue of Ignorance),2014,Comedy,42335698 Blue Jasmine,2013,Drama,33404871 United 93,2006,History,31471430 Honey,2003,Drama,30222640 Glory,1989,History,26830000 Spy Hard,1996,Action,26906039 The Fog,1980,Fantasy,21378000 Soul Surfer,2011,Sport,43853424 Observe and Report,2009,Crime,23993605 Conan the Destroyer,1984,Fantasy,26400000 Raging Bull,1980,Drama,45250 Love Happens,2009,Drama,22927390 Young Sherlock Holmes,1985,Thriller,4250320 Fame,2009,Musical,22452209 127 Hours,2010,Thriller,18329466 Small Time Crooks,2000,Comedy,17071230 Center Stage,2000,Drama,17174870 Love the Coopers,2015,Comedy,26284475 Catch That Kid,2004,Comedy,16702864 Life as a House,2001,Drama,15561627 Steve Jobs,2015,Biography,17750583 "I Love You, Beth Cooper",2009,Comedy,14793904 Youth in Revolt,2009,Romance,15281286 The Legend of the Lone Ranger,1981,Western,8000000 The Tailor of Panama,2001,Thriller,13491653 Getaway,2013,Crime,10494494 The Ice Storm,1997,Drama,7837632 And So It Goes,2014,Drama,15155772 Troop Beverly Hills,1989,Comedy,8508843 Being Julia,2004,Drama,7739049 9½ Weeks,1986,Romance,6734844 Dragonslayer,1981,Adventure,6000000 The Last Station,2009,Drama,6615578 Ed Wood,1994,Biography,5887457 Labor Day,2013,Drama,13362308 Mongol: The Rise of Genghis Khan,2007,Biography,5701643 RocknRolla,2008,Crime,5694401 Megaforce,1982,Action,5333658 Hamlet,1996,Drama,4414535 Midnight Special,2016,Thriller,3707794 Anything Else,2003,Romance,3203044 The Railway Man,2013,Biography,4435083 The White Ribbon,2009,Drama,2222647 The Wraith,1986,Romance,3500000 The Salton Sea,2002,Drama,676698 One Man's Hero,1999,Western,229311 Renaissance,2006,Thriller,63260 Superbad,2007,Comedy,121463226 Step Up 2: The Streets,2008,Romance,58006147 Hoodwinked!,2005,Comedy,51053787 Hotel Rwanda,2004,Drama,23472900 Hitman,2007,Action,39687528 Black Nativity,2013,Family,7017178 City of Ghosts,2002,Crime,325491 The Others,2001,Horror,96471845 Aliens,1986,Action,85200000 My Fair Lady,1964,Romance,72000000 I Know What You Did Last Summer,1997,Mystery,72219395 Let's Be Cops,2014,Comedy,82389560 Sideways,2004,Adventure,71502303 Beerfest,2006,Comedy,19179969 Halloween,1978,Thriller,47000000 Hero,2002,Action,84961 Good Boy!,2003,Drama,37566230 The Best Man Holiday,2013,Comedy,70492685 Smokin' Aces,2006,Action,35635046 Saw 3D: The Final Chapter,2010,Mystery,45670855 40 Days and 40 Nights,2002,Romance,37939782 TRON: Legacy,2010,Action,172051787 A Night at the Roxbury,1998,Romance,30324946 Beastly,2011,Fantasy,27854896 The Hills Have Eyes,2006,Horror,41777564 Dickie Roberts: Former Child Star,2003,Comedy,22734486 "McFarland, USA",2015,Biography,44469602 Pitch Perfect,2012,Comedy,64998368 Summer Catch,2001,Comedy,19693891 A Simple Plan,1998,Drama,16311763 They,2002,Horror,12693621 Larry the Cable Guy: Health Inspector,2006,Comedy,15655665 The Adventures of Elmo in Grouchland,1999,Comedy,11634458 Brooklyn's Finest,2009,Drama,27154426 Evil Dead,2013,Horror,54239856 My Life in Ruins,2009,Romance,8662318 American Dreamz,2006,Music,7156725 Superman IV: The Quest for Peace,1987,Sci-Fi,15681020 Running Scared,2006,Drama,6855137 Shanghai Surprise,1986,Romance,2315683 The Illusionist,2006,Mystery,39825798 Roar,1981,Thriller,2000000 Veronica Guerin,2003,Crime,1569918 Southland Tales,2006,Thriller,273420 The Apparition,2012,Horror,4930798 My Girl,1991,Romance,59847242 Fur: An Imaginary Portrait of Diane Arbus,2006,Drama,220914 The Illusionist,2006,Drama,39825798 Wall Street,1987,Crime,43848100 Sense and Sensibility,1995,Drama,42700000 Becoming Jane,2007,Drama,18663911 Sydney White,2007,Comedy,11702090 House of Sand and Fog,2003,Drama,13005485 Dead Poets Society,1989,Drama,95860116 Dumb & Dumber,1994,Comedy,127175354 When Harry Met Sally...,1989,Romance,92823600 The Verdict,1982,Drama,54000000 Road Trip,2000,Comedy,68525609 Varsity Blues,1999,Sport,52885587 The Artist,2011,Comedy,44667095 The Unborn,2009,Fantasy,42638165 Moonrise Kingdom,2012,Comedy,45507053 The Texas Chainsaw Massacre: The Beginning,2006,Horror,39511038 The Young Messiah,2016,Drama,6462576 The Master of Disguise,2002,Family,40363530 Pan's Labyrinth,2006,War,37623143 See Spot Run,2001,Action,33357476 Baby Boy,2001,Crime,28734552 The Roommate,2011,Horror,37300107 Joe Dirt,2001,Comedy,27087695 Double Impact,1991,Crime,30102717 Hot Fuzz,2007,Action,23618786 The Women,2008,Drama,26896744 Vicky Cristina Barcelona,2008,Drama,23213577 Boys and Girls,2000,Drama,20627372 White Oleander,2002,Drama,16346122 Jennifer's Body,2009,Comedy,16204793 Drowning Mona,2000,Mystery,15427192 Radio Days,1987,Comedy,14792779 Left Behind,2014,Fantasy,13998282 Remember Me,2010,Romance,19057024 How to Deal,2003,Drama,14108518 My Stepmother Is an Alien,1988,Sci-Fi,13854000 Philadelphia,1993,Drama,77324422 The Thirteenth Floor,1999,Thriller,15500000 Duets,2000,Music,4734235 Hollywood Ending,2002,Romance,4839383 Detroit Rock City,1999,Comedy,4193025 Highlander,1986,Action,5900000 Things We Lost in the Fire,2007,Drama,2849142 Steel,1997,Crime,1686429 The Immigrant,2013,Drama,1984743 The White Countess,2005,History,1666262 Trance,2013,Thriller,2319187 Soul Plane,2004,Comedy,13922211 Good,2008,Romance,23091 Enter the Void,2009,Fantasy,336467 Vamps,2012,Romance,2964 The Homesman,2014,Drama,2428883 Juwanna Mann,2002,Drama,13571817 Slow Burn,2005,Thriller,1181197 Wasabi,2001,Drama,81525 Slither,2006,Comedy,7774730 Beverly Hills Cop,1984,Action,234760500 Home Alone,1990,Family,285761243 3 Men and a Baby,1987,Comedy,167780960 Tootsie,1982,Comedy,177200000 Top Gun,1986,Romance,176781728 "Crouching Tiger, Hidden Dragon",2000,Action,128067808 American Beauty,1999,Drama,130058047 The King's Speech,2010,History,138795342 Twins,1988,Crime,111936400 The Yellow Handkerchief,2008,Romance,317040 The Color Purple,1985,Drama,94175854 The Imitation Game,2014,War,91121452 Private Benjamin,1980,War,69800000 Diary of a Wimpy Kid,2010,Family,64001297 Mama,2013,Horror,71588220 Halloween,1978,Thriller,47000000 National Lampoon's Vacation,1983,Comedy,61400000 Bad Grandpa,2013,Comedy,101978840 The Queen,2006,Biography,56437947 Beetlejuice,1988,Fantasy,73326666 Why Did I Get Married?,2007,Comedy,55184721 Little Women,1994,Family,50003300 The Woman in Black,2012,Horror,54322273 When a Stranger Calls,2006,Thriller,47860214 Big Fat Liar,2002,Adventure,47811275 Wag the Dog,1997,Drama,43022524 The Lizzie McGuire Movie,2003,Romance,42672630 Snitch,2013,Action,42919096 Krampus,2015,Fantasy,42592530 The Faculty,1998,Sci-Fi,40064955 Cop Land,1997,Thriller,44886089 Not Another Teen Movie,2001,Comedy,37882551 End of Watch,2012,Drama,40983001 Aloha,2015,Romance,20991497 The Skulls,2000,Action,35007180 The Theory of Everything,2014,Romance,35887263 Malibu's Most Wanted,2003,Crime,34308901 Where the Heart Is,2000,Drama,33771174 Lawrence of Arabia,1962,History,6000000 Halloween II,2009,Horror,33386128 Wild,2014,Biography,37877959 The Last House on the Left,2009,Crime,32721635 The Wedding Date,2005,Romance,31585300 Halloween: Resurrection,2002,Comedy,30259652 Clash of the Titans,2010,Adventure,163192114 The Princess Bride,1987,Adventure,30857814 The Great Debaters,2007,Drama,30226144 Drive,2011,Crime,35054909 Confessions of a Teenage Drama Queen,2004,Comedy,29302097 The Object of My Affection,1998,Drama,29106737 28 Weeks Later,2007,Horror,28637507 When the Game Stands Tall,2014,Family,30127963 Because of Winn-Dixie,2005,Comedy,32645546 Love & Basketball,2000,Drama,27441122 Grosse Pointe Blank,1997,Crime,28014536 All About Steve,2009,Comedy,33860010 Book of Shadows: Blair Witch 2,2000,Mystery,26421314 The Craft,1996,Horror,24881000 Match Point,2005,Thriller,23089926 Ramona and Beezus,2010,Family,26161406 The Remains of the Day,1993,Drama,22954968 Boogie Nights,1997,Drama,26384919 Nowhere to Run,1993,Drama,22189039 Flicka,2006,Family,20998709 The Hills Have Eyes II,2007,Horror,20801344 Urban Legends: Final Cut,2000,Thriller,21468807 Tuck Everlasting,2002,Fantasy,19158074 The Marine,2006,Thriller,18843314 Keanu,2016,Comedy,20566327 Country Strong,2010,Music,20218921 Disturbing Behavior,1998,Sci-Fi,17411331 The Place Beyond the Pines,2012,Crime,21383298 The November Man,2014,Thriller,24984868 Eye of the Beholder,1999,Mystery,16459004 The Hurt Locker,2008,Drama,15700000 Firestarter,1984,Sci-Fi,15100000 Killing Them Softly,2012,Crime,14938570 A Most Wanted Man,2014,Thriller,17237244 Freddy Got Fingered,2001,Comedy,14249005 The Pirates Who Don't Do Anything: A VeggieTales Movie,2008,Animation,12701880 Highlander: Endgame,2000,Sci-Fi,12801190 Idlewild,2006,Romance,12549485 One Day,2011,Drama,13766014 Whip It,2009,Sport,13034417 Confidence,2003,Crime,12212417 The Muse,1999,Comedy,11614236 De-Lovely,2004,Drama,13337299 New York Stories,1989,Drama,10763469 Barney's Great Adventure,1998,Family,11144518 The Man with the Iron Fists,2012,Action,15608545 Home Fries,1998,Drama,10443316 Here on Earth,2000,Romance,10494147 Brazil,1985,Drama,9929000 Raise Your Voice,2004,Music,10411980 The Big Lebowski,1998,Comedy,17439163 Black Snake Moan,2006,Music,9396487 Dark Blue,2002,Crime,9059588 A Mighty Heart,2007,Thriller,9172810 Whatever It Takes,2000,Drama,8735529 Boat Trip,2002,Comedy,8586376 The Importance of Being Earnest,2002,Comedy,8378141 Hoot,2006,Family,8080116 In Bruges,2008,Crime,7757130 Peeples,2013,Romance,9123834 The Rocker,2008,Music,6409206 Post Grad,2009,Comedy,6373693 Promised Land,2012,Drama,7556708 Whatever Works,2009,Comedy,5306447 The In Crowd,2000,Thriller,5217498 Three Burials,2005,Crime,5023275 Jakob the Liar,1999,Drama,4956401 Kiss Kiss Bang Bang,2005,Comedy,4235837 Idle Hands,1999,Comedy,4002955 Mulholland Drive,2001,Drama,7219578 You Will Meet a Tall Dark Stranger,2010,Comedy,3247816 Never Let Me Go,2010,Sci-Fi,2412045 Transsiberian,2008,Drama,2203641 The Clan of the Cave Bear,1986,Drama,1953732 Crazy in Alabama,1999,Comedy,1954202 Funny Games,2007,Crime,1294640 Metropolis,1927,Drama,26435 District B13,2004,Crime,1197786 Things to Do in Denver When You're Dead,1995,Drama,529766 The Assassin,2015,Drama,613556 Buffalo Soldiers,2001,Crime,353743 Ong-bak 2,2008,Action,102055 The Midnight Meat Train,2008,Fantasy,73548 The Son of No One,2011,Drama,28870 All the Queen's Men,2001,Action,22723 The Good Night,2007,Drama,20380 Groundhog Day,1993,Fantasy,70906973 Magic Mike XXL,2015,Music,66009973 Romeo + Juliet,1996,Drama,46338728 Sarah's Key,2010,Drama,7691700 Unforgiven,1992,Western,101157447 Manderlay,2005,Drama,74205 Slumdog Millionaire,2008,Drama,141319195 Fatal Attraction,1987,Romance,156645693 Pretty Woman,1990,Romance,178406268 Crocodile Dundee II,1988,Action,109306210 Born on the Fourth of July,1989,Biography,70001698 Cool Runnings,1993,Adventure,68856263 My Bloody Valentine,2009,Horror,51527787 The Possession,2012,Thriller,49122319 Stomp the Yard,2007,Drama,61356221 The Spy Who Loved Me,1977,Sci-Fi,46800000 Urban Legend,1998,Thriller,38048637 Dangerous Liaisons,1988,Romance,34700000 White Fang,1991,Drama,34793160 Superstar,1999,Romance,30628981 The Iron Lady,2011,Drama,29959436 Jonah: A VeggieTales Movie,2002,Animation,25571351 Poetic Justice,1993,Drama,27515786 All About the Benjamins,2002,Crime,25482931 Vampire in Brooklyn,1995,Horror,19900000 An American Haunting,2005,Horror,16298046 My Boss's Daughter,2003,Comedy,15549702 A Perfect Getaway,2009,Adventure,15483540 Our Family Wedding,2010,Comedy,20246959 Dead Man on Campus,1998,Comedy,15062898 Tea with Mussolini,1999,Comedy,14348123 Thinner,1996,Fantasy,15171475 Crooklyn,1994,Drama,13640000 Jason X,2001,Thriller,12610731 Big Fat Liar,2002,Comedy,47811275 Bobby,2006,History,11204499 Head Over Heels,2001,Romance,10397365 Fun Size,2012,Adventure,9402410 Little Children,2006,Drama,5459824 Gossip,2000,Thriller,5108820 A Walk on the Moon,1999,Drama,4741987 Catch a Fire,2006,Biography,4291965 Soul Survivors,2001,Drama,3100650 Jefferson in Paris,1995,History,2474000 Caravans,1978,Adventure,1000000 Mr. Turner,2014,Drama,3958500 Amen.,2002,Biography,274299 The Lucky Ones,2008,Drama,183088 Margaret,2011,Drama,46495 Flipped,2010,Drama,1752214 Brokeback Mountain,2005,Romance,83025853 Teenage Mutant Ninja Turtles,2014,Action,190871240 Clueless,1995,Romance,56631572 Far from Heaven,2002,Drama,15854988 Hot Tub Time Machine 2,2015,Comedy,12282677 Quills,2000,Drama,7060876 Seven Psychopaths,2012,Comedy,14989761 Downfall,2004,Drama,5501940 The Sea Inside,2004,Drama,2086345 "Good Morning, Vietnam",1987,Biography,123922370 The Last Godfather,2010,Comedy,163591 Justin Bieber: Never Say Never,2011,Music,73000942 Black Swan,2010,Drama,106952327 RoboCop,2014,Action,58607007 The Godfather: Part II,1974,Drama,57300000 Save the Last Dance,2001,Drama,91038276 A Nightmare on Elm Street 4: The Dream Master,1988,Horror,49369900 Miracles from Heaven,2016,Drama,61693523 "Dude, Where's My Car?",2000,Comedy,46729374 Young Guns,1988,Western,44726644 St. Vincent,2014,Comedy,44134898 About Last Night,2014,Comedy,48637684 10 Things I Hate About You,1999,Drama,38176108 The New Guy,2002,Comedy,28972187 Loaded Weapon 1,1993,Crime,27979400 The Shallows,2016,Thriller,54257433 The Butterfly Effect,2004,Thriller,23947 Snow Day,2000,Comedy,60008303 This Christmas,2007,Romance,49121934 Baby Geniuses,1999,Crime,27141959 The Big Hit,1998,Comedy,27052167 Harriet the Spy,1996,Drama,26539321 Child's Play 2,1990,Horror,28501605 No Good Deed,2014,Crime,52543632 The Mist,2007,Horror,25592632 Ex Machina,2015,Drama,25440971 Being John Malkovich,1999,Comedy,22858926 Two Can Play That Game,2001,Comedy,22235901 Earth to Echo,2014,Family,38916903 Crazy/Beautiful,2001,Romance,16929123 Letters from Iwo Jima,2006,History,13753931 The Astronaut Farmer,2006,Drama,10996440 Room,2015,Drama,14677654 Dirty Work,1998,Comedy,9975684 Serial Mom,1994,Thriller,7881335 Dick,1999,Comedy,6241697 Light It Up,1999,Thriller,5871603 54,1998,Music,16574731 Bubble Boy,2001,Comedy,5002310 Birthday Girl,2001,Crime,4919896 21 & Over,2013,Comedy,25675765 "Paris, je t'aime",2006,Romance,4857376 Resurrecting the Champ,2007,Drama,3169424 Admission,2013,Romance,18004225 The Widow of Saint-Pierre,2000,Drama,3058380 Chloe,2009,Mystery,3074838 Faithful,1996,Drama,2104000 Brothers,2009,Drama,28501651 Find Me Guilty,2006,Crime,1172769 The Perks of Being a Wallflower,2012,Drama,17738570 Excessive Force,1993,Action,1200000 Infamous,2006,Crime,1150403 The Claim,2000,Drama,403932 The Vatican Tapes,2015,Thriller,1712111 Attack the Block,2011,Thriller,1024175 In the Land of Blood and Honey,2011,Drama,301305 The Call,2013,Thriller,51872378 The Crocodile Hunter: Collision Course,2002,Comedy,28399192 I Love You Phillip Morris,2009,Romance,2035566 Antwone Fisher,2002,Biography,21078145 The Emperor's Club,2002,Drama,14060950 True Romance,1993,Thriller,12281500 Glengarry Glen Ross,1992,Crime,10725228 The Killer Inside Me,2010,Drama,214966 Sorority Row,2009,Horror,11956207 Lars and the Real Girl,2007,Romance,5949693 The Boy in the Striped Pajamas,2008,Drama,9030581 Dancer in the Dark,2000,Musical,4157491 Oscar and Lucinda,1997,Romance,1508689 The Funeral,1996,Crime,1227324 Solitary Man,2009,Romance,4360548 Machete,2010,Thriller,26589953 Casino Jack,2010,Comedy,1039869 The Land Before Time,1988,Adventure,48092846 Tae Guk Gi: The Brotherhood of War,2004,Action,1110186 The Perfect Game,2009,Drama,1089445 The Exorcist,1973,Horror,204565000 Jaws,1975,Adventure,260000000 American Pie,1999,Comedy,101736215 Ernest & Celestine,2012,Crime,71442 The Golden Child,1986,Action,79817937 Think Like a Man,2012,Comedy,91547205 Barbershop,2002,Drama,75074950 Star Trek II: The Wrath of Khan,1982,Action,78900000 Ace Ventura: Pet Detective,1994,Comedy,72217000 WarGames,1983,Sci-Fi,79568000 Witness,1985,Romance,65500000 Act of Valor,2012,War,70011073 Step Up,2006,Crime,65269010 Beavis and Butt-Head Do America,1996,Crime,63071133 Jackie Brown,1997,Thriller,39647595 Harold & Kumar Escape from Guantanamo Bay,2008,Comedy,38087366 Chronicle,2012,Sci-Fi,64572496 Yentl,1983,Drama,30400000 Time Bandits,1981,Sci-Fi,42365600 Crossroads,2002,Drama,37188667 Project X,2012,Comedy,54724272 One Hour Photo,2002,Drama,31597131 Quarantine,2008,Sci-Fi,31691811 The Eye,2008,Mystery,31397498 Johnson Family Vacation,2004,Comedy,31179516 How High,2001,Fantasy,31155435 The Muppet Christmas Carol,1992,Fantasy,27281507 Casino Royale,2006,Thriller,167007184 Frida,2002,Romance,25776062 Katy Perry: Part of Me,2012,Music,25240988 The Fault in Our Stars,2014,Romance,124868837 Rounders,1998,Crime,22905674 Top Five,2014,Romance,25277561 Stir of Echoes,1999,Mystery,21133087 Philomena,2013,Drama,37707719 The Upside of Anger,2005,Comedy,18761993 Aquamarine,2006,Romance,18595716 Paper Towns,2015,Drama,31990064 Nebraska,2013,Drama,17613460 Tales from the Crypt: Demon Knight,1995,Thriller,21088568 Max Keeble's Big Move,2001,Comedy,17292381 Young Adult,2011,Comedy,16300302 Crank,2006,Thriller,27829874 Living Out Loud,1998,Drama,12902790 Das Boot,1981,Adventure,11433134 The Alamo,2004,War,22406362 Sorority Boys,2002,Comedy,10198766 About Time,2013,Romance,15294553 House of Flying Daggers,2004,Adventure,11041228 Arbitrage,2012,Drama,7918283 Project Almanac,2015,Sci-Fi,22331028 Cadillac Records,2008,Music,8134217 Screwed,2000,Comedy,6982680 Fortress,1992,Crime,6739141 For Your Consideration,2006,Comedy,5542025 Celebrity,1998,Drama,5032496 Running with Scissors,2006,Comedy,6754898 From Justin to Kelly,2003,Musical,4922166 Girl 6,1996,Comedy,4903000 In the Cut,2003,Mystery,4717455 Two Lovers,2008,Drama,3148482 Last Orders,2001,Drama,2326407 The Host,2006,Horror,2201412 Ravenous,1999,Fantasy,2060953 Charlie Bartlett,2007,Drama,3950294 The Great Beauty,2013,Drama,2835886 The Dangerous Lives of Altar Boys,2002,Drama,1779284 Stoker,2013,Drama,1702277 2046,2004,Sci-Fi,261481 Married Life,2007,Romance,1506998 Duma,2005,Family,860002 Ondine,2009,Drama,548934 Brother,2000,Drama,447750 Welcome to Collinwood,2002,Comedy,333976 Critical Care,1997,Comedy,141853 The Life Before Her Eyes,2007,Drama,303439 Trade,2007,Thriller,214202 Fateless,2005,Romance,195888 Breakfast of Champions,1999,Comedy,175370 City of Life and Death,2009,War,119922 Home,2015,Adventure,177343675 5 Days of War,2011,Action,17149 Snatch,2000,Comedy,30093107 Pet Sematary,1989,Fantasy,57469179 Gremlins,1984,Horror,148170000 Star Wars: Episode IV - A New Hope,1977,Sci-Fi,460935665 Dirty Grandpa,2016,Comedy,35537564 Doctor Zhivago,1965,Drama,111722000 High School Musical 3: Senior Year,2008,Comedy,90556401 The Fighter,2010,Drama,93571803 My Cousin Vinny,1992,Comedy,52929168 If I Stay,2014,Drama,50461335 Major League,1989,Sport,49797148 Phone Booth,2002,Crime,46563158 A Walk to Remember,2002,Drama,41227069 Dead Man Walking,1995,Crime,39025000 Cruel Intentions,1999,Romance,38201895 Saw VI,2009,Mystery,27669413 The Secret Life of Bees,2008,Drama,37766350 Corky Romano,2001,Comedy,23978402 Raising Cain,1992,Drama,21370057 Invaders from Mars,1986,Horror,4884663 Brooklyn,2015,Romance,38317535 Out Cold,2001,Comedy,13903262 The Ladies Man,2000,Comedy,13592872 Quartet,2012,Drama,18381787 Tomcats,2001,Comedy,13558739 Frailty,2001,Thriller,13103828 Woman in Gold,2015,Drama,33305037 Kinsey,2004,Drama,10214647 Army of Darkness,1992,Horror,11501093 Slackers,2002,Comedy,4814244 What's Eating Gilbert Grape,1993,Drama,9170214 The Visual Bible: The Gospel of John,2003,History,4068087 Vera Drake,2004,Drama,3753806 The Guru,2002,Romance,3034181 The Perez Family,1995,Comedy,2832826 Inside Llewyn Davis,2013,Drama,13214255 O,2001,Drama,16017403 Return to the Blue Lagoon,1991,Adventure,2807854 Copying Beethoven,2006,Music,352786 Poltergeist,1982,Horror,76600000 Saw V,2008,Mystery,56729973 Jindabyne,2006,Thriller,399879 Kabhi Alvida Naa Kehna,2006,Drama,3275443 An Ideal Husband,1999,Romance,18535191 The Last Days on Mars,2013,Thriller,23838 Darkness,2002,Horror,22160085 2001: A Space Odyssey,1968,Sci-Fi,56715371 E.T. the Extra-Terrestrial,1982,Family,434949459 In the Land of Women,2007,Drama,11043445 For Greater Glory: The True Story of Cristiada,2012,History,5669081 Good Will Hunting,1997,Drama,138339411 Saw III,2006,Horror,80150343 Stripes,1981,Action,85300000 Bring It On,2000,Sport,68353550 The Purge: Election Year,2016,Horror,78845130 She's All That,1999,Romance,63319509 Precious,2009,Drama,47536959 Saw IV,2007,Mystery,63270259 White Noise,2005,Drama,55865715 Madea's Family Reunion,2006,Drama,63231524 The Color of Money,1986,Drama,52293982 The Mighty Ducks,1992,Sport,50752337 The Grudge,2004,Mystery,110175871 Happy Gilmore,1996,Comedy,38624000 Jeepers Creepers,2001,Horror,37470017 Bill & Ted's Excellent Adventure,1989,Comedy,40485039 Oliver!,1968,Musical,16800000 The Best Exotic Marigold Hotel,2011,Drama,46377022 Recess: School's Out,2001,Family,36696761 Mad Max Beyond Thunderdome,1985,Sci-Fi,36200000 The Boy,2016,Thriller,35794166 Devil,2010,Thriller,33583175 Friday After Next,2002,Comedy,32983713 Insidious: Chapter 3,2015,Fantasy,52200504 The Last Dragon,1985,Comedy,33000000 Snatch,2000,Crime,30093107 The Lawnmower Man,1992,Sci-Fi,32101000 Nick and Norah's Infinite Playlist,2008,Music,31487293 Dogma,1999,Adventure,30651422 The Banger Sisters,2002,Comedy,30306281 Twilight Zone: The Movie,1983,Horror,29500000 Road House,1989,Action,30050028 A Low Down Dirty Shame,1994,Comedy,29392418 Swimfan,2002,Thriller,28563926 Employee of the Month,2006,Comedy,28435406 Can't Hardly Wait,1998,Comedy,25339117 The Outsiders,1983,Crime,25600000 Sinister 2,2015,Thriller,27736779 Sparkle,2012,Music,24397469 Valentine,2001,Horror,20384136 The Fourth Kind,2009,Sci-Fi,25464480 A Prairie Home Companion,2006,Music,20338609 Sugar Hill,1993,Thriller,18272447 Rushmore,1998,Comedy,17096053 Skyline,2010,Sci-Fi,21371425 The Second Best Exotic Marigold Hotel,2015,Comedy,33071558 Kit Kittredge: An American Girl,2008,Family,17655201 The Perfect Man,2005,Romance,16247775 Mo' Better Blues,1990,Drama,16153600 Kung Pow: Enter the Fist,2002,Action,16033556 Tremors,1990,Horror,16667084 Wrong Turn,2003,Thriller,15417771 The Corruptor,1999,Crime,15156200 Mud,2012,Drama,21589307 Reno 911!: Miami,2007,Comedy,20339754 One Direction: This Is Us,2013,Documentary,28873374 Hey Arnold! The Movie,2002,Family,13684949 My Week with Marilyn,2011,Drama,14597405 The Matador,2005,Thriller,12570442 Love Jones,1997,Drama,12514138 The Gift,2015,Mystery,43771291 End of the Spear,2005,Adventure,11703287 Get Over It,2001,Comedy,11560259 Office Space,1999,Comedy,10824921 Drop Dead Gorgeous,1999,Thriller,10561238 Big Eyes,2014,Biography,14479776 Very Bad Things,1998,Comedy,9801782 Sleepover,2004,Romance,8070311 MacGruber,2010,Action,8460995 Dirty Pretty Things,2002,Thriller,8111360 Movie 43,2013,Comedy,8828771 The Tourist,2010,Romance,67631157 Over Her Dead Body,2008,Romance,7563670 Seeking a Friend for the End of the World,2012,Adventure,6619173 American History X,1998,Drama,6712241 The Collection,2012,Thriller,6842058 Teacher's Pet,2004,Comedy,6491350 The Red Violin,1998,Romance,9473382 The Straight Story,1999,Drama,6197866 Deuces Wild,2002,Drama,6044618 Bad Words,2013,Comedy,7764027 Black or White,2014,Drama,21569041 On the Line,2001,Romance,4356743 Rescue Dawn,2006,Drama,5484375 "Jeff, Who Lives at Home",2011,Comedy,4244155 I Am Love,2009,Romance,5004648 Atlas Shrugged II: The Strike,2012,Drama,3333823 Romeo Is Bleeding,1993,Crime,3275585 The Limey,1999,Thriller,3193102 Crash,2004,Thriller,54557348 The House of Mirth,2000,Romance,3041803 Malone,1987,Thriller,3060858 Peaceful Warrior,2006,Drama,1055654 Bucky Larson: Born to Be a Star,2011,Comedy,2331318 Bamboozled,2000,Music,2185266 The Forest,2016,Thriller,26583369 Sphinx,1981,Adventure,800000 While We're Young,2014,Drama,7574066 A Better Life,2011,Drama,1754319 Spider,2002,Drama,1641788 Gun Shy,2000,Comedy,1631839 Nicholas Nickleby,2002,Drama,1309849 The Iceman,2012,Drama,1939441 Cecil B. DeMented,2000,Thriller,1276984 Killer Joe,2011,Romance,1987762 The Joneses,2009,Drama,1474508 Owning Mahowny,2003,Drama,1011054 The Brothers Solomon,2007,Comedy,900926 My Blueberry Nights,2007,Drama,866778 Swept Away,2002,Romance,598645 "War, Inc.",2008,Action,578527 Shaolin Soccer,2001,Action,488872 The Brown Bunny,2003,Drama,365734 Rosewater,2014,Biography,3093491 Imaginary Heroes,2004,Drama,228524 High Heels and Low Lifes,2001,Comedy,226792 Severance,2006,Thriller,136432 Edmond,2005,Drama,131617 Police Academy: Mission to Moscow,1994,Crime,126247 An Alan Smithee Film: Burn Hollywood Burn,1997,Comedy,15447 The Open Road,2009,Comedy,19348 The Good Guy,2009,Romance,100503 Motherhood,2009,Drama,92900 Blonde Ambition,2007,Comedy,5561 The Oxford Murders,2008,Thriller,3607 Eulogy,2004,Comedy,70527 "The Good, the Bad, the Weird",2008,Action,128486 The Lost City,2005,Drama,2483955 Next Friday,2000,Comedy,57176582 You Only Live Twice,1967,Adventure,43100000 Amour,2012,Drama,225377 Poltergeist III,1988,Horror,14114488 "It's a Mad, Mad, Mad, Mad World",1963,Comedy,46300000 Richard III,1995,War,2600000 Melancholia,2011,Drama,3029870 Jab Tak Hai Jaan,2012,Drama,3047539 Alien,1979,Sci-Fi,78900000 The Texas Chain Saw Massacre,1974,Horror,30859000 The Runaways,2010,Music,3571735 Fiddler on the Roof,1971,Romance,50000000 Thunderball,1965,Adventure,63600000 Set It Off,1996,Action,36049108 The Best Man,1999,Drama,34074895 Child's Play,1988,Horror,33244684 Sicko,2007,Drama,24530513 The Purge: Anarchy,2014,Horror,71519230 Down to You,2000,Romance,20035310 Harold & Kumar Go to White Castle,2004,Adventure,18225165 The Contender,2000,Drama,17804273 Boiler Room,2000,Thriller,16938179 Black Christmas,2006,Horror,16235293 Henry V,1989,War,10161099 The Way of the Gun,2000,Action,6047856 Igby Goes Down,2002,Drama,4681503 PCU,1994,Comedy,4350774 Gracie,2007,Drama,2955039 Trust the Man,2005,Romance,1530535 Hamlet 2,2008,Comedy,4881867 Glee: The 3D Concert Movie,2011,Music,11860839 The Legend of Suriyothai,2001,Adventure,454255 Two Evil Eyes,1990,Horror,349618 All or Nothing,2002,Drama,112935 Princess Kaiulani,2009,Drama,883887 Opal Dream,2006,Drama,13751 Flame and Citron,2008,Drama,145109 Undiscovered,2005,Comedy,1046166 Crocodile Dundee,1986,Comedy,174635000 Awake,2007,Crime,14373825 Skin Trade,2014,Action,162 Crazy Heart,2009,Drama,39462438 The Rose,1979,Romance,29200000 Baggage Claim,2013,Comedy,21564616 Election,1999,Drama,14879556 The DUFF,2015,Comedy,34017854 Glitter,2001,Drama,4273372 Bright Star,2009,Drama,4440055 My Name Is Khan,2010,Drama,4018695 Footloose,1984,Romance,80000000 Limbo,1999,Adventure,1997807 The Karate Kid,1984,Drama,90800000 Repo! The Genetic Opera,2008,Musical,140244 Pulp Fiction,1994,Drama,107930000 Nightcrawler,2014,Thriller,32279955 Club Dread,2004,Thriller,4992159 The Sound of Music,1965,Family,163214286 Splash,1984,Fantasy,69800000 Little Miss Sunshine,2006,Comedy,59889948 Stand by Me,1986,Adventure,52287414 28 Days Later...,2002,Drama,45063889 You Got Served,2004,Drama,40066497 Escape from Alcatraz,1979,Biography,36500000 Brown Sugar,2002,Comedy,27362712 A Thin Line Between Love and Hate,1996,Comedy,34746109 50/50,2011,Romance,34963967 Shutter,2008,Horror,25926543 That Awkward Moment,2014,Romance,26049082 Much Ado About Nothing,1993,Drama,22551000 On Her Majesty's Secret Service,1969,Adventure,22800000 New Nightmare,1994,Fantasy,18090181 Drive Me Crazy,1999,Comedy,17843379 Half Baked,1998,Crime,17278980 New in Town,2009,Comedy,16699684 Syriana,2005,Thriller,50815288 American Psycho,2000,Crime,15047419 The Good Girl,2002,Romance,14015786 The Boondock Saints II: All Saints Day,2009,Crime,10269307 Enough Said,2013,Comedy,17536788 Easy A,2010,Romance,58401464 Shadow of the Vampire,2000,Horror,8279017 Prom,2011,Drama,10106233 Held Up,1999,Comedy,4692814 Woman on Top,2000,Comedy,5018450 Anomalisa,2015,Animation,3442820 Another Year,2010,Comedy,3205244 8 Women,2002,Romance,3076425 Showdown in Little Tokyo,1991,Thriller,2275557 Clay Pigeons,1998,Crime,1789892 It's Kind of a Funny Story,2010,Comedy,6350058 Made in Dagenham,2010,History,1094798 When Did You Last See Your Father?,2007,Biography,1071240 Prefontaine,1997,Biography,532190 The Secret of Kells,2009,Animation,686383 Begin Again,2013,Drama,16168741 Down in the Valley,2005,Drama,568695 Brooklyn Rules,2007,Crime,398420 The Singing Detective,2003,Comedy,336456 Fido,2006,Horror,298110 The Wendell Baker Story,2005,Comedy,127144 Wild Target,2010,Crime,117190 Pathology,2008,Horror,108662 10th & Wolf,2006,Thriller,53481 Dear Wendy,2004,Romance,23106 Akira,1988,Sci-Fi,439162 Imagine Me & You,2005,Comedy,671240 The Blood of Heroes,1989,Sci-Fi,882290 Driving Miss Daisy,1989,Drama,106593296 Soul Food,1997,Comedy,43490057 Rumble in the Bronx,1995,Action,32333860 Thank You for Smoking,2005,Comedy,24792061 Hostel: Part II,2007,Horror,17544812 An Education,2009,Drama,12574715 The Hotel New Hampshire,1984,Drama,5100000 Narc,2002,Mystery,10460089 Men with Brooms,2002,Romance,4239767 Witless Protection,2008,Crime,4131640 Extract,2009,Crime,10814185 Code 46,2003,Thriller,197148 Crash,2004,Thriller,54557348 Albert Nobbs,2011,Drama,3014541 Persepolis,2007,War,4443403 The Neon Demon,2016,Thriller,1330827 Harry Brown,2009,Action,1818681 Spider-Man 3,2007,Romance,336530303 The Omega Code,1999,Action,12610552 Juno,2007,Drama,143492840 Diamonds Are Forever,1971,Adventure,43800000 The Godfather,1972,Drama,134821952 Flashdance,1983,Music,94900000 500 Days of Summer,2009,Comedy,32391374 The Piano,1993,Drama,40158000 Magic Mike,2012,Comedy,113709992 Darkness Falls,2003,Thriller,32131483 Live and Let Die,1973,Action,35400000 My Dog Skip,2000,Family,34099640 Jumping the Broom,2011,Drama,37295394 The Great Gatsby,2013,Drama,144812796 "Good Night, and Good Luck.",2005,Drama,31501218 Capote,2005,Biography,28747570 Desperado,1995,Thriller,25625110 The Claim,2000,Western,403932 Logan's Run,1976,Sci-Fi,25000000 The Man with the Golden Gun,1974,Adventure,21000000 Action Jackson,1988,Comedy,20257000 The Descent,2005,Horror,26005908 Devil's Due,2014,Horror,15818967 Flirting with Disaster,1996,Comedy,14891000 The Devil's Rejects,2005,Crime,16901126 Dope,2015,Drama,17474107 In Too Deep,1999,Drama,14003141 Skyfall,2012,Thriller,304360277 House of 1000 Corpses,2003,Horror,12583510 A Serious Man,2009,Comedy,9190525 Get Low,2009,Mystery,9176553 Warlock,1989,Horror,9094451 A Single Man,2009,Drama,9166863 The Last Temptation of Christ,1988,Drama,8373585 Outside Providence,1999,Romance,7292175 Bride & Prejudice,2004,Musical,6601079 Rabbit-Proof Fence,2002,Biography,6165429 Who's Your Caddy?,2007,Comedy,5694308 Split Second,1992,Crime,5430822 The Other Side of Heaven,2001,Drama,4720371 Redbelt,2008,Sport,2344847 Cyrus,2010,Drama,7455447 A Dog of Flanders,1999,Family,2148212 Auto Focus,2002,Drama,2062066 Factory Girl,2006,Drama,1654367 We Need to Talk About Kevin,2011,Drama,1738692 The Mighty Macs,2009,Sport,1889522 Mother and Child,2009,Drama,1110286 March or Die,1977,Drama,1000000 Les visiteurs,1993,Comedy,700000 Somewhere,2010,Drama,1768416 Chairman of the Board,1998,Comedy,306715 Hesher,2010,Drama,382946 The Heart of Me,2002,Romance,196067 Freeheld,2015,Biography,532988 The Extra Man,2010,Comedy,453079 Ca$h,2010,Crime,46451 Wah-Wah,2005,Drama,233103 Pale Rider,1985,Western,41400000 Dazed and Confused,1993,Comedy,7993039 The Chumscrubber,2005,Comedy,49526 Shade,2003,Thriller,10696 House at the End of the Street,2012,Horror,31607598 Incendies,2010,Drama,6857096 "Remember Me, My Love",2003,Romance,223878 Elite Squad,2007,Crime,8060 Annabelle,2014,Horror,84263837 Bran Nue Dae,2009,Musical,110029 Boyz n the Hood,1991,Drama,57504069 La Bamba,1987,Music,54215416 Dressed to Kill,1980,Romance,31899000 The Adventures of Huck Finn,1993,Family,24103594 Go,1999,Comedy,16842303 Friends with Money,2006,Comedy,13367101 Bats,1999,Thriller,10149779 Nowhere in Africa,2001,Biography,6173485 Layer Cake,2004,Drama,2338695 The Work and the Glory II: American Zion,2005,Drama,2024854 The East,2013,Drama,2268296 A Home at the End of the World,2004,Romance,1029017 The Messenger,2009,Drama,66637 Control,2007,Biography,871577 The Terminator,1984,Sci-Fi,38400000 Good Bye Lenin!,2003,Drama,4063859 The Damned United,2009,Drama,449558 Mallrats,1995,Romance,2122561 Grease,1978,Romance,181360000 Platoon,1986,War,137963328 Fahrenheit 9/11,2004,Drama,119078393 Butch Cassidy and the Sundance Kid,1969,Biography,102308900 Mary Poppins,1964,Comedy,102300000 Ordinary People,1980,Drama,54800000 Around the World in 80 Days,2004,Comedy,24004159 West Side Story,1961,Romance,43650000 Caddyshack,1980,Comedy,39800000 The Brothers,2001,Drama,27457409 The Wood,1999,Romance,25047631 The Usual Suspects,1995,Crime,23272306 A Nightmare on Elm Street 5: The Dream Child,1989,Thriller,22168359 Van Wilder: Party Liaison,2002,Romance,21005329 The Wrestler,2008,Drama,26236603 Duel in the Sun,1946,Western,20400000 Best in Show,2000,Comedy,18621249 Escape from New York,1981,Sci-Fi,25244700 School Daze,1988,Comedy,14545844 Daddy Day Camp,2007,Comedy,13235267 Mystic Pizza,1988,Drama,12793213 Sliding Doors,1998,Drama,11883495 Tales from the Hood,1995,Horror,11797927 The Last King of Scotland,2006,Biography,17605861 Halloween 5,1989,Thriller,11642254 Bernie,2011,Crime,9203192 Pollock,2000,Biography,8596914 200 Cigarettes,1999,Drama,6851636 The Words,2012,Mystery,11434867 Casa de mi Padre,2012,Western,5895238 City Island,2009,Drama,6670712 The Guard,2011,Comedy,5359774 College,2008,Comedy,4693919 The Virgin Suicides,1999,Drama,4859475 Miss March,2009,Romance,4542775 Wish I Was Here,2014,Drama,3588432 Simply Irresistible,1999,Romance,4394936 Hedwig and the Angry Inch,2001,Music,3029081 Only the Strong,1993,Action,3273588 Shattered Glass,2003,Drama,2207975 Novocaine,2001,Comedy,2025238 The Wackness,2008,Romance,2077046 Beastmaster 2: Through the Portal of Time,1991,Fantasy,869325 The 5th Quarter,2010,Sport,399611 The Greatest,2009,Romance,115862 Come Early Morning,2006,Romance,117560 Lucky Break,2001,Romance,54606 "Surfer, Dude",2008,Comedy,36497 Deadfall,2012,Crime,65804 L'auberge espagnole,2002,Comedy,3895664 Murder by Numbers,2002,Crime,31874869 Winter in Wartime,2008,Drama,542860 The Protector,2005,Drama,11905519 Bend It Like Beckham,2002,Sport,32541719 Sunshine State,2002,Drama,3064356 Crossover,2006,Action,7009668 [Rec] 2,2009,Horror,27024 The Sting,1973,Drama,159600000 Chariots of Fire,1981,Drama,58800000 Diary of a Mad Black Woman,2005,Comedy,50382128 Shine,1996,Romance,35811509 Don Jon,2013,Romance,24475193 Ghost World,2001,Comedy,6200756 Iris,2001,Romance,1292119 The Chorus,2004,Drama,3629758 Mambo Italiano,2003,Comedy,6239558 Wonderland,2003,Thriller,1056102 Do the Right Thing,1989,Drama,27545445 Harvard Man,2001,Thriller,56007 Le Havre,2011,Comedy,611709 R100,2013,Drama,22770 Salvation Boulevard,2011,Action,27445 The Ten,2007,Romance,766487 Headhunters,2011,Drama,1196752 Saint Ralph,2004,Sport,795126 Insidious: Chapter 2,2013,Horror,83574831 Saw II,2005,Mystery,87025093 10 Cloverfield Lane,2016,Thriller,71897215 Jackass: The Movie,2002,Comedy,64267897 Lights Out,2016,Horror,56536016 Paranormal Activity 3,2011,Horror,104007828 Ouija,2014,Fantasy,50820940 A Nightmare on Elm Street 3: Dream Warriors,1987,Action,44793200 The Gift,2015,Mystery,43771291 Instructions Not Included,2013,Drama,44456509 Paranormal Activity 4,2012,Horror,53884821 The Robe,1953,History,36000000 Freddy's Dead: The Final Nightmare,1991,Thriller,34872293 Monster,2003,Crime,34468224 Paranormal Activity: The Marked Ones,2014,Thriller,32453345 Dallas Buyers Club,2013,Drama,27296514 The Lazarus Effect,2015,Sci-Fi,25799043 Memento,2000,Mystery,25530884 Oculus,2013,Horror,27689474 Clerks II,2006,Comedy,24138847 Billy Elliot,2000,Drama,21994911 The Way Way Back,2013,Drama,21501098 House Party 2,1991,Romance,19281235 Doug's 1st Movie,1999,Comedy,19421271 The Apostle,1997,Drama,20733485 Our Idiot Brother,2011,Comedy,24809547 The Players Club,1998,Drama,23031390 O,2001,Thriller,16017403 "As Above, So Below",2014,Horror,21197315 Addicted,2014,Drama,17382982 Eve's Bayou,1997,Drama,14821531 Still Alice,2014,Drama,18656400 Friday the 13th Part VIII: Jason Takes Manhattan,1989,Horror,14343976 My Big Fat Greek Wedding,2002,Romance,241437427 Spring Breakers,2012,Drama,14123773 Halloween: The Curse of Michael Myers,1995,Thriller,15126948 Y Tu Mamá También,2001,Adventure,13622333 Shaun of the Dead,2004,Horror,13464388 The Haunting of Molly Hartley,2008,Drama,13350177 Lone Star,1996,Mystery,13269963 Halloween 4: The Return of Michael Myers,1988,Horror,17768000 April Fool's Day,1986,Horror,12947763 Diner,1982,Comedy,14100000 Lone Wolf McQuade,1983,Action,12200000 Apollo 18,2011,Horror,17683670 Sunshine Cleaning,2008,Comedy,12055108 No Escape,2015,Action,27285953 Not Easily Broken,2009,Drama,10572742 Digimon: The Movie,2000,Sci-Fi,9628751 Saved!,2004,Drama,8786715 The Barbarian Invasions,2003,Romance,3432342 The Forsaken,2001,Thriller,6755271 UHF,1989,Drama,6157157 Slums of Beverly Hills,1998,Drama,5480318 Made,2001,Crime,5308707 Moon,2009,Mystery,5009677 The Sweet Hereafter,1997,Drama,4306697 Of Gods and Men,2010,Drama,3950029 Bottle Shock,2008,Drama,4040588 Heavenly Creatures,1994,Drama,3049135 90 Minutes in Heaven,2015,Drama,4700361 Everything Must Go,2010,Comedy,2711210 Zero Effect,1998,Comedy,1980338 The Machinist,2004,Thriller,1082044 Light Sleeper,1992,Drama,1100000 Kill the Messenger,2014,Drama,2445646 Rabbit Hole,2010,Drama,2221809 Party Monster,2003,Thriller,296665 Green Room,2015,Thriller,3219029 Bottle Rocket,1996,Drama,1040879 Albino Alligator,1996,Thriller,326308 "Lovely, Still",2008,Drama,124720 Desert Blue,1998,Drama,99147 Redacted,2007,Crime,65087 Fascination,2004,Thriller,16066 I Served the King of England,2006,Comedy,617228 Sling Blade,1996,Drama,24475416 Hostel,2005,Horror,47277326 Tristram Shandy: A Cock and Bull Story,2005,Drama,1247453 Take Shelter,2011,Thriller,1729969 Lady in White,1988,Mystery,1705139 The Texas Chainsaw Massacre 2,1986,Horror,8025872 Only God Forgives,2013,Drama,778565 The Names of Love,2010,Comedy,513836 Savage Grace,2007,Drama,434417 Police Academy,1984,Comedy,81200000 Four Weddings and a Funeral,1994,Romance,52700832 25th Hour,2002,Drama,13060843 Bound,1996,Thriller,3798532 Requiem for a Dream,2000,Drama,3609278 Tango,1998,Musical,1687311 Donnie Darko,2001,Thriller,727883 Character,1997,Mystery,713413 Spun,2002,Drama,410241 Lady Vengeance,2005,Crime,211667 Mean Machine,2001,Drama,92191 Exiled,2006,Action,49413 After.Life,2009,Horror,108229 One Flew Over the Cuckoo's Nest,1975,Drama,112000000 The Sweeney,2012,Action,26345 Whale Rider,2002,Drama,20772796 Pan,2015,Adventure,34964818 Night Watch,2004,Fantasy,1487477 The Crying Game,1992,Thriller,62549000 Porky's,1981,Comedy,105500000 Survival of the Dead,2009,Horror,101055 Lost in Translation,2003,Drama,44566004 Annie Hall,1977,Romance,39200000 The Greatest Show on Earth,1952,Romance,36000000 Exodus: Gods and Kings,2014,Adventure,65007045 Monster's Ball,2001,Romance,31252964 Maggie,2015,Drama,131175 Leaving Las Vegas,1995,Drama,31968347 The Boy Next Door,2015,Thriller,35385560 The Kids Are All Right,2010,Comedy,20803237 They Live,1988,Thriller,13008928 The Last Exorcism Part II,2013,Horror,15152879 Boyhood,2014,Drama,25359200 Scoop,2006,Comedy,10515579 Planet of the Apes,2001,Adventure,180011740 The Wash,2001,Comedy,10097096 3 Strikes,2000,Comedy,9821335 The Cooler,2003,Romance,8243880 The Night Listener,2006,Mystery,7825820 My Soul to Take,2010,Mystery,14637490 The Orphanage,2007,Thriller,7159147 A Haunted House 2,2014,Comedy,17314483 The Rules of Attraction,2002,Comedy,6525762 Four Rooms,1995,Comedy,4301331 Secretary,2002,Comedy,4046737 The Real Cancun,2003,Documentary,3713002 Talk Radio,1988,Drama,3468572 Waiting for Guffman,1996,Comedy,2892582 Love Stinks,1999,Comedy,2800000 You Kill Me,2007,Crime,2426851 Thumbsucker,2005,Comedy,1325073 Mirrormask,2005,Adventure,864959 Samsara,2011,Music,2601847 The Barbarians,1987,Adventure,800000 Poolhall Junkies,2002,Drama,562059 The Loss of Sexual Innocence,1999,Drama,399793 Joe,2013,Drama,371897 Shooting Fish,1997,Crime,302204 Prison,1987,Crime,354704 Psycho Beach Party,2000,Mystery,265107 The Big Tease,1999,Comedy,185577 Trust,2010,Crime,58214 An Everlasting Piece,2000,Comedy,75078 Adore,2013,Drama,317125 Mondays in the Sun,2002,Drama,146402 Stake Land,2010,Sci-Fi,18469 The Last Time I Committed Suicide,1997,Drama,12836 Futuro Beach,2014,Drama,20262 Gone with the Wind,1939,War,198655278 Desert Dancer,2014,Drama,143653 Major Dundee,1965,Adventure,14873 Annie Get Your Gun,1950,Romance,8000000 Defendor,2009,Drama,37606 The Pirate,1948,Musical,2956000 The Good Heart,2009,Drama,19959 The History Boys,2006,Comedy,2706659 Unknown,2011,Action,61094903 The Full Monty,1997,Music,45857453 Airplane!,1980,Comedy,83400000 Friday,1995,Drama,27900000 Menace II Society,1993,Drama,27900000 Creepshow 2,1987,Horror,14000000 The Witch,2015,Mystery,25138292 I Got the Hook Up,1998,Comedy,10305534 She's the One,1996,Romance,9449219 Gods and Monsters,1998,Biography,6390032 The Secret in Their Eyes,2009,Mystery,20167424 Evil Dead II,1987,Horror,5923044 Pootie Tang,2001,Musical,3293258 La otra conquista,1998,History,886410 Trollhunter,2010,Horror,252652 Ira & Abby,2006,Romance,220234 The Watch,2012,Sci-Fi,34350553 Winter Passing,2005,Comedy,101228 D.E.B.S.,2004,Romance,96793 March of the Penguins,2005,Documentary,77413017 Margin Call,2011,Biography,5354039 Choke,2008,Drama,2926565 Whiplash,2014,Drama,13092000 City of God,2002,Drama,7563397 Human Traffic,1999,Music,104257 The Hunt,2012,Drama,610968 Bella,2006,Romance,8108247 Maria Full of Grace,2004,Drama,6517198 Beginners,2010,Drama,5776314 Animal House,1978,Comedy,141600000 Goldfinger,1964,Thriller,51100000 Trainspotting,1996,Drama,16501785 The Original Kings of Comedy,2000,Documentary,38168022 Paranormal Activity 2,2010,Horror,84749884 Waking Ned Devine,1998,Comedy,24788807 Bowling for Columbine,2002,Drama,21244913 A Nightmare on Elm Street 2: Freddy's Revenge,1985,Fantasy,30000000 A Room with a View,1985,Romance,20966644 The Purge,2013,Horror,64423650 Sinister,2012,Horror,48056940 Martin Lawrence Live: Runteldat,2002,Comedy,19184015 Air Bud,1997,Comedy,24629916 Jason Lives: Friday the 13th Part VI,1986,Horror,19472057 The Bridge on the River Kwai,1957,War,27200000 Spaced Invaders,1990,Adventure,15369573 Jason Goes to Hell: The Final Friday,1993,Fantasy,15935068 Dave Chappelle's Block Party,2005,Documentary,11694528 Next Day Air,2009,Comedy,10017041 Phat Girlz,2006,Comedy,7059537 Before Midnight,2013,Romance,8114507 Teen Wolf Too,1987,Fantasy,7888703 Phantasm II,1988,Sci-Fi,7282851 Real Women Have Curves,2002,Comedy,5844929 East Is East,1999,Drama,4170647 Whipped,2000,Comedy,4142507 Kama Sutra: A Tale of Love,1996,Crime,4109095 Warlock: The Armageddon,1993,Fantasy,3902679 8 Heads in a Duffel Bag,1997,Crime,3559990 Thirteen Conversations About One Thing,2001,Drama,3287435 Jawbreaker,1999,Thriller,3071947 Basquiat,1996,Biography,2961991 Tsotsi,2005,Drama,2912363 DysFunktional Family,2003,Comedy,2223990 Tusk,2014,Horror,1821983 Oldboy,2003,Thriller,2181290 Letters to God,2010,Family,2848578 Hobo with a Shotgun,2011,Action,703002 Bachelorette,2012,Romance,418268 Tim and Eric's Billion Dollar Movie,2012,Comedy,200803 The Gambler,2014,Thriller,33631221 Summer Storm,2004,Sport,95016 Chain Letter,2009,Horror,143000 Just Looking,1999,Drama,39852 The Divide,2011,Thriller,22000 Alice in Wonderland,2010,Fantasy,334185206 Cinderella,2015,Fantasy,201148159 Central Station,1998,Drama,5595428 Boynton Beach Club,2005,Romance,3123749 High Tension,2003,Horror,3645438 Hustle & Flow,2005,Crime,22201636 Some Like It Hot,1959,Romance,25000000 Friday the 13th Part VII: The New Blood,1988,Horror,19170001 The Wizard of Oz,1939,Fantasy,22202612 Young Frankenstein,1974,Comedy,86300000 Diary of the Dead,2007,Horror,952620 Ulee's Gold,1997,Drama,9054736 Blazing Saddles,1974,Western,119500000 Friday the 13th: The Final Chapter,1984,Thriller,32600000 Maurice,1987,Romance,3130592 The Astronaut's Wife,1999,Thriller,10654581 Timecrimes,2007,Sci-Fi,38108 A Haunted House,2013,Fantasy,40041683 2016: Obama's America,2012,Documentary,33349949 Halloween II,2009,Horror,33386128 That Thing You Do!,1996,Comedy,25809813 Halloween III: Season of the Witch,1982,Mystery,14400000 Kevin Hart: Let Me Explain,2013,Comedy,32230907 My Own Private Idaho,1991,Drama,6401336 Garden State,2004,Comedy,26781723 Before Sunrise,1995,Romance,5400000 Jesus' Son,1999,Drama,1282084 Robot & Frank,2012,Crime,3325638 My Life Without Me,2003,Romance,395592 The Spectacular Now,2013,Comedy,6851969 Religulous,2008,Comedy,12995673 Fuel,2008,Documentary,173783 Dodgeball: A True Underdog Story,2004,Sport,114324072 Eye of the Dolphin,2006,Family,71904 8: The Mormon Proposition,2010,Documentary,99851 The Other End of the Line,2008,Drama,115504 Anatomy,2000,Horror,5725 Sleep Dealer,2008,Thriller,75727 Super,2010,Drama,322157 Get on the Bus,1996,Drama,5731103 Thr3e,2006,Drama,978908 This Is England,2006,Crime,327919 Go for It!,2011,Musical,178739 Friday the 13th Part III,1982,Thriller,36200000 Friday the 13th: A New Beginning,1985,Thriller,21300000 The Last Sin Eater,2007,Drama,379643 The Best Years of Our Lives,1946,Drama,23650000 Elling,2001,Comedy,313436 From Russia with Love,1963,Thriller,24800000 The Toxic Avenger Part II,1989,Comedy,792966 It Follows,2014,Horror,14673301 Mad Max 2: The Road Warrior,1981,Action,9003011 The Legend of Drunken Master,1994,Comedy,11546543 Boys Don't Cry,1999,Crime,11533945 Silent House,2011,Drama,12555230 The Lives of Others,2006,Thriller,11284657 Courageous,2011,Drama,34522221 The Triplets of Belleville,2003,Animation,7002255 Smoke Signals,1998,Comedy,6719300 Before Sunset,2004,Drama,5792822 Amores Perros,2000,Thriller,5383834 Thirteen,2003,Drama,4599680 Winter's Bone,2010,Drama,6531491 Me and You and Everyone We Know,2005,Comedy,3885134 We Are Your Friends,2015,Drama,3590010 Harsh Times,2005,Thriller,3335839 Captive,2015,Thriller,2557668 Full Frontal,2002,Romance,2506446 Witchboard,1986,Thriller,7369373 Hamlet,1996,Drama,4414535 Shortbus,2006,Drama,1984378 Waltz with Bashir,2008,Documentary,2283276 "The Book of Mormon Movie, Volume 1: The Journey",2003,Adventure,1098224 The Diary of a Teenage Girl,2015,Drama,1477002 In the Shadow of the Moon,2007,History,1134049 The Virginity Hit,2010,Comedy,535249 House of D,2004,Comedy,371081 Six-String Samurai,1998,Drama,124494 Saint John of Las Vegas,2009,Drama,100669 Stonewall,2015,Drama,186354 London,2005,Drama,12667 Sherrybaby,2006,Drama,198407 Stealing Harvard,2002,Crime,13973532 Gangster's Paradise: Jerusalema,2008,Drama,4958 The Lady from Shanghai,1947,Crime,7927 The Ghastly Love of Johnny X,2012,Comedy,2436 River's Edge,1986,Drama,4600000 Northfork,2003,Drama,1420578 Buried,2010,Drama,1028658 One to Another,2006,Drama,18435 Carrie,2013,Fantasy,35266619 A Nightmare on Elm Street,1984,Horror,26505000 Man on Wire,2008,Crime,2957978 Brotherly Love,2015,Drama,444044 The Last Exorcism,2010,Horror,40990055 El crimen del padre Amaro,2002,Drama,5709616 Beasts of the Southern Wild,2012,Drama,12784397 Songcatcher,2000,Music,3050934 Run Lola Run,1998,Crime,7267324 May,2002,Horror,145540 In the Bedroom,2001,Drama,35918429 I Spit on Your Grave,2010,Horror,92401 "Happy, Texas",1999,Crime,1943649 My Summer of Love,2004,Drama,992238 The Lunchbox,2013,Drama,4231500 Yes,2004,Drama,396035 Caramel,2007,Romance,1060591 Mississippi Mermaid,1969,Drama,26893 I Love Your Work,2003,Mystery,2580 Dawn of the Dead,2004,Thriller,58885635 Waitress,2007,Drama,19067631 Bloodsport,1988,Drama,11806119 The Squid and the Whale,2005,Drama,7362100 Kissing Jessica Stein,2001,Comedy,7022940 Exotica,1994,Romance,5132222 Buffalo '66,1998,Comedy,2365931 Insidious,2010,Horror,53991137 Nine Queens,2000,Drama,1221261 The Ballad of Jack and Rose,2005,Drama,712294 The To Do List,2013,Comedy,3447339 Killing Zoe,1993,Thriller,418953 The Believer,2001,Drama,406035 Session 9,2001,Horror,373967 I Want Someone to Eat Cheese With,2006,Romance,194568 Modern Times,1936,Drama,163245 Stolen Summer,2002,Drama,119841 My Name Is Bruce,2007,Fantasy,173066 Pontypool,2008,Fantasy,3478 Trucker,2008,Drama,52166 The Lords of Salem,2012,Drama,1163508 Jack Reacher,2012,Crime,80033643 Snow White and the Seven Dwarfs,1937,Musical,184925485 The Holy Girl,2004,Drama,304124 Incident at Loch Ness,2004,Comedy,36830 "Lock, Stock and Two Smoking Barrels",1998,Crime,3650677 The Celebration,1998,Drama,1647780 Trees Lounge,1996,Drama,695229 Journey from the Fall,2006,Drama,638951 The Basket,1999,Drama,609042 Mercury Rising,1998,Crime,32940507 The Hebrew Hammer,2003,Comedy,19539 Friday the 13th Part 2,1981,Mystery,19100000 "Sex, Lies, and Videotape",1989,Drama,24741700 Saw,2004,Mystery,55153403 Super Troopers,2001,Comedy,18488314 The Day the Earth Stood Still,2008,Sci-Fi,79363785 Monsoon Wedding,2001,Comedy,13876974 You Can Count on Me,2000,Drama,9180275 Lucky Number Slevin,2006,Crime,22494487 But I'm a Cheerleader,1999,Comedy,2199853 Home Run,2013,Sport,2859955 Reservoir Dogs,1992,Crime,2812029 "The Good, the Bad and the Ugly",1966,Western,6100000 The Second Mother,2015,Comedy,375723 Blue Like Jazz,2012,Drama,594904 Down and Out with the Dolls,2001,Music,58936 Airborne,1993,Adventure,2850263 Waiting...,2005,Comedy,16101109 From a Whisper to a Scream,1987,Horror,1400000 Beyond the Black Rainbow,2010,Sci-Fi,56129 The Raid: Redemption,2011,Thriller,4105123 Rocky,1976,Drama,117235247 The Fog,1980,Horror,21378000 Unfriended,2014,Thriller,31537320 The Howling,1981,Horror,17986000 Dr. No,1962,Action,16067035 Chernobyl Diaries,2012,Thriller,18112929 Hellraiser,1987,Horror,14564027 God's Not Dead 2,2016,Drama,20773070 Cry_Wolf,2005,Mystery,10042266 Godzilla 2000,1999,Thriller,10037390 Blue Valentine,2010,Romance,9701559 Transamerica,2005,Adventure,9013113 The Devil Inside,2012,Horror,53245055 Beyond the Valley of the Dolls,1970,Music,9000000 The Green Inferno,2013,Horror,7186670 The Sessions,2012,Romance,5997134 Next Stop Wonderland,1998,Romance,3386698 Juno,2007,Comedy,143492840 Frozen River,2008,Drama,2508841 20 Feet from Stardom,2013,Documentary,4946250 Two Girls and a Guy,1997,Drama,1950218 Walking and Talking,1996,Comedy,1277257 The Full Monty,1997,Comedy,45857453 Who Killed the Electric Car?,2006,Documentary,1677838 The Broken Hearts Club: A Romantic Comedy,2000,Sport,1744858 Goosebumps,2015,Horror,80021740 Slam,1998,Drama,982214 Brigham City,2001,Crime,798341 All the Real Girls,2003,Romance,548712 Dream with the Fishes,1997,Drama,464655 Blue Car,2002,Drama,464126 Wristcutters: A Love Story,2006,Drama,104077 The Battle of Shaker Heights,2003,Comedy,279282 The Lovely Bones,2009,Fantasy,43982842 The Act of Killing,2012,Documentary,484221 Taxi to the Dark Side,2007,Crime,274661 Once in a Lifetime: The Extraordinary Story of the New York Cosmos,2006,Sport,144431 Antarctica: A Year on Ice,2013,Biography,287761 Hardflip,2012,Action,96734 The House of the Devil,2009,Horror,100659 The Perfect Host,2010,Comedy,48430 Safe Men,1998,Comedy,21210 The Specials,2000,Comedy,12996 Alone with Her,2006,Crime,10018 Creative Control,2015,Drama,62480 Special,2006,Drama,6387 In Her Line of Fire,2006,Drama,721 The Jimmy Show,2001,Drama,703 Trance,2013,Mystery,2319187 On the Waterfront,1954,Romance,9600000 L!fe Happens,2011,Comedy,20186 "4 Months, 3 Weeks and 2 Days",2007,Drama,1185783 Hard Candy,2005,Thriller,1007962 The Quiet,2005,Drama,381186 Fruitvale Station,2013,Romance,16097842 The Brass Teapot,2012,Fantasy,6643 Snitch,2013,Action,42919096 Latter Days,2003,Drama,819939 "For a Good Time, Call...",2012,Comedy,1243961 Time Changer,2002,Fantasy,15278 A Separation,2011,Mystery,7098492 Welcome to the Dollhouse,1995,Comedy,4771000 Ruby in Paradise,1993,Romance,1001437 Raising Victor Vargas,2002,Drama,2073984 Deterrence,1999,Drama,144583 Dead Snow,2009,Comedy,41709 American Graffiti,1973,Drama,115000000 Aqua Teen Hunger Force Colon Movie Film for Theaters,2007,Sci-Fi,5518918 Safety Not Guaranteed,2012,Comedy,4007792 Kill List,2011,Crime,26297 The Innkeepers,2011,Horror,77501 The Unborn,2009,Fantasy,42638165 Interview with the Assassin,2002,Drama,47329 Donkey Punch,2008,Drama,18378 Hoop Dreams,1994,Sport,7830611 King Kong,2005,Action,218051260 House of Wax,2005,Horror,32048809 Half Nelson,2006,Drama,2694973 Top Hat,1935,Musical,3000000 The Blair Witch Project,1999,Horror,140530114 Woodstock,1970,Documentary,13300000 Mercy Streets,2000,Drama,171988 Broken Vessels,1998,Drama,13493 A Hard Day's Night,1964,Musical,515005 Fireproof,2008,Romance,33451479 Benji,1974,Adventure,39552600 Open Water,2003,Drama,30500882 Kingdom of the Spiders,1977,Horror,17000000 The Station Agent,2003,Comedy,5739376 To Save a Life,2009,Drama,3773863 Beyond the Mat,1999,Documentary,2047570 Osama,2003,Drama,1127331 Sholem Aleichem: Laughing in the Darkness,2011,Documentary,906666 Groove,2000,Music,1114943 Twin Falls Idaho,1999,Drama,985341 Mean Creek,2004,Drama,603943 Hurricane Streets,1997,Drama,334041 Never Again,2001,Comedy,295468 Civil Brand,2002,Crime,243347 Lonesome Jim,2005,Comedy,154077 Seven Samurai,1954,Drama,269061 Finishing the Game: The Search for a New Bruce Lee,2007,Comedy,52850 Rubber,2010,Comedy,98017 Home,2015,Adventure,177343675 Kiss the Bride,2007,Romance,31937 The Slaughter Rule,2002,Drama,13134 Monsters,2010,Thriller,237301 Detention of the Dead,2012,Horror,1332 Crossroads,2002,Drama,37188667 Oz the Great and Powerful,2013,Adventure,234903076 Straight Out of Brooklyn,1991,Drama,2712293 Bloody Sunday,2002,History,768045 Conversations with Other Women,2005,Drama,379122 Poultrygeist: Night of the Chicken Dead,2006,Comedy,23000 42nd Street,1933,Comedy,2300000 Metropolitan,1990,Drama,2938208 Napoleon Dynamite,2004,Comedy,44540956 Blue Ruin,2013,Drama,258113 Paranormal Activity,2007,Horror,107917283 Monty Python and the Holy Grail,1975,Fantasy,1229197 Quinceañera,2006,Drama,1689999 Tarnation,2003,Documentary,592014 The Beyond,1981,Horror,126387 What Happens in Vegas,2008,Comedy,80276912 The Broadway Melody,1929,Musical,2808000 Maniac,2012,Horror,12843 Murderball,2005,Documentary,1523883 American Ninja 2: The Confrontation,1987,Action,4000000 Halloween,1978,Thriller,47000000 Tumbleweeds,1999,Drama,1281176 The Prophecy,1995,Thriller,16115878 When the Cat's Away,1996,Comedy,1652472 Pieces of April,2003,Drama,2360184 Old Joy,2006,Drama,255352 Wendy and Lucy,2008,Drama,856942 Fighting Tommy Riley,2004,Drama,5199 Across the Universe,2007,Musical,24343673 Locker 13,2014,Thriller,2468 Compliance,2012,Crime,318622 Chasing Amy,1997,Comedy,12006514 Lovely & Amazing,2001,Drama,4186931 Better Luck Tomorrow,2002,Romance,3799339 The Incredibly True Adventure of Two Girls in Love,1995,Comedy,1977544 Chuck & Buck,2000,Drama,1050600 American Desi,2001,Comedy,902835 Cube,1997,Mystery,489220 I Married a Strange Person!,1997,Animation,203134 November,2004,Drama,191309 Like Crazy,2011,Romance,3388210 The Canyons,2013,Thriller,49494 Burn,2012,Documentary,111300 Urbania,2000,Drama,1027119 "The Beast from 20,000 Fathoms",1953,Horror,5000000 Swingers,1996,Comedy,4505922 A Fistful of Dollars,1964,Drama,3500000 Side Effects,2013,Drama,32154410 The Trials of Darryl Hunt,2006,Documentary,1111 Children of Heaven,1997,Family,925402 Weekend,2011,Romance,469947 She's Gotta Have It,1986,Comedy,7137502 Another Earth,2011,Romance,1316074 Sweet Sweetback's Baadasssss Song,1971,Thriller,15180000 Tadpole,2000,Romance,2882062 Once,2007,Music,9437933 The Horse Boy,2009,Documentary,155984 The Texas Chain Saw Massacre,1974,Horror,30859000 Roger & Me,1989,Documentary,6706368 Facing the Giants,2006,Sport,10174663 The Gallows,2015,Horror,22757819 Hollywood Shuffle,1987,Comedy,5228617 The Lost Skeleton of Cadavra,2001,Horror,110536 Cheap Thrills,2013,Drama,59379 The Last House on the Left,2009,Thriller,32721635 Pi,1998,Thriller,3216970 20 Dates,1998,Comedy,536767 Super Size Me,2004,Comedy,11529368 The FP,2011,Comedy,40557 Happy Christmas,2014,Comedy,30084 The Brothers McMullen,1995,Drama,10246600 Tiny Furniture,2010,Romance,389804 George Washington,2000,Drama,241816 Smiling Fish & Goat on Fire,1999,Comedy,277233 Clerks,1994,Comedy,3151130 In the Company of Men,1997,Comedy,2856622 Sabotage,2014,Action,10499968 Slacker,1991,Drama,1227508 Clean,2004,Romance,136007 The Circle,2000,Drama,673780 Primer,2004,Thriller,424760 El Mariachi,1992,Romance,2040920 My Date with Drew,2004,Documentary,85222 ================================================ FILE: R/inst/tutorials/01-playlist/playlist.R ================================================ # A flow to help you build your favorite movie playlist. # The flow performs the following steps: # 1) Ingests a CSV file containing metadata about movies. # 2) Loads two of the columns from the CSV into python lists. # 3) In parallel branches: # - A) Filters movies by the genre parameter. # - B) Choose a random movie from a different genre. # 4) Displays the top entries from the playlist. library(metaflow) # Parse the CSV file start <- function(self){ self$df <- read.csv("./movies.csv", stringsAsFactors=FALSE) } # Filter the movies by genre. pick_movie <- function(self){ # select rows which has the specified genre movie_by_genre <- self$df[self$df$genre == self$genre, ] # randomize the title names shuffled_rows <- sample(nrow(movie_by_genre)) self$movies <- movie_by_genre[shuffled_rows, ] } # This step chooses a random movie from a different genre. bonus_movie <- function(self){ # select all movies not matching the specified genre bonus_movies <- self$df[self$df$genre != self$genre, ] idx <- sample(nrow(bonus_movies), size=1) self$bonus <- bonus_movies$movie_title[idx] } # Join our parallel branches and merge results. join <- function(self, inputs){ # Reassign relevant variables from our branches. self$bonus <- inputs$bonus_movie$bonus self$playlist <- inputs$pick_movie$movies } # Print out the playlist and bonus movie. end <- function(self){ message("Playlist for movies in genre: ", self$genre) print(head(self$playlist)) for (i in 1:nrow(self$playlist)){ message(sprintf("Pick %d: %s", i, self$playlist$movie_title[i])) if (i >= self$top_k) break; } } metaflow("PlayListFlow") %>% parameter("genre", help = "Filter movies for a particular genre.", default = "Sci-Fi") %>% parameter("top_k", help = "The number of movies to recommend in the playlist.", default = 5, type = "int") %>% step(step = "start", r_function = start, next_step = c("pick_movie", "bonus_movie")) %>% step(step = "pick_movie", r_function = pick_movie, next_step = "join") %>% step(step = "bonus_movie", r_function = bonus_movie, next_step = "join") %>% step(step = "join", r_function = join, join = TRUE, next_step = "end") %>% step(step = "end", r_function = end) %>% run() ================================================ FILE: R/inst/tutorials/01-playlist/playlist.Rmd ================================================ --- title: "Episode 01-playlist: Let's build you a movie playlist" output: html_notebook --- PlayListFlow is a movie playlist generator, and this notebook shows how you can use the Metaflow client to access data from the versioned Metaflow runs. In this example, you can view all the historical playlists. ```{r} suppressPackageStartupMessages(library(metaflow)) message("Current metadata provider: ", get_metadata()) message("Current namespace: ", get_namespace()) ``` ## Print your latest generated playlist ```{r} flow <- flow_client$new("PlayListFlow") run_id <- flow$latest_successful_run message("Using run: ", run_id) run <- run_client$new(flow, run_id) message("Bonus pick: ", run$artifact("bonus")) message("Playlist generated on ", run$finished_at) message("Playlist for movies in genre: ", run$artifact("genre")) playlist <- run$artifact("playlist") print(head(playlist)) ``` ================================================ FILE: R/inst/tutorials/02-statistics/README.md ================================================ # Episode 02-statistics: Is this Data Science? **Use metaflow to load the movie metadata CSV file into a data frame and compute some movie genre-specific statistics. These statistics are then used in later examples to improve our playlist generator. You can optionally use the Metaflow client to eyeball the results in a Markdown Notebook, and make some simple plots.** #### Showcasing: - Fan-out over a set of parameters using Metaflow foreach. - Plotting results in a Markdown Notebook. #### Before playing this episode: 1. Configure your metadata provider to a user-wise global provider, if you haven't done it already. ```bash $mkdir -p /path/to/home/.metaflow $export METAFLOW_DEFAULT_METADATA=local ``` #### To play this episode: ##### Execute the flow: In a terminal: 1. ```cd tutorials/02-statistics``` 2. ```Rscript stats.R show``` 3. ```Rscript stats.R run``` If you are using RStudio, you can run this script by directly executing `source("stats.R")`. ##### Inspect the results: Open the R Markdown file ```stats.Rmd``` in RStudio and execute the markdown cells. ================================================ FILE: R/inst/tutorials/02-statistics/movies.csv ================================================ movie_title,title_year,genre,gross Avatar,2009,Action,760505847 Pirates of the Caribbean: At World's End,2007,Fantasy,309404152 Spectre,2015,Thriller,200074175 The Dark Knight Rises,2012,Thriller,448130642 John Carter,2012,Action,73058679 Spider-Man 3,2007,Romance,336530303 Tangled,2010,Romance,200807262 Avengers: Age of Ultron,2015,Action,458991599 Harry Potter and the Half-Blood Prince,2009,Fantasy,301956980 Batman v Superman: Dawn of Justice,2016,Adventure,330249062 Superman Returns,2006,Adventure,200069408 Quantum of Solace,2008,Action,168368427 Pirates of the Caribbean: Dead Man's Chest,2006,Action,423032628 The Lone Ranger,2013,Adventure,89289910 Man of Steel,2013,Action,291021565 The Chronicles of Narnia: Prince Caspian,2008,Family,141614023 The Avengers,2012,Adventure,623279547 Pirates of the Caribbean: On Stranger Tides,2011,Action,241063875 Men in Black 3,2012,Sci-Fi,179020854 The Hobbit: The Battle of the Five Armies,2014,Adventure,255108370 The Amazing Spider-Man,2012,Fantasy,262030663 Robin Hood,2010,Drama,105219735 The Hobbit: The Desolation of Smaug,2013,Adventure,258355354 The Golden Compass,2007,Fantasy,70083519 King Kong,2005,Drama,218051260 Titanic,1997,Drama,658672302 Captain America: Civil War,2016,Adventure,407197282 Battleship,2012,Sci-Fi,65173160 Jurassic World,2015,Thriller,652177271 Skyfall,2012,Action,304360277 Spider-Man 2,2004,Romance,373377893 Iron Man 3,2013,Adventure,408992272 Alice in Wonderland,2010,Adventure,334185206 X-Men: The Last Stand,2006,Sci-Fi,234360014 Monsters University,2013,Fantasy,268488329 Transformers: Revenge of the Fallen,2009,Adventure,402076689 Transformers: Age of Extinction,2014,Sci-Fi,245428137 Oz the Great and Powerful,2013,Family,234903076 The Amazing Spider-Man 2,2014,Fantasy,202853933 TRON: Legacy,2010,Sci-Fi,172051787 Cars 2,2011,Comedy,191450875 Green Lantern,2011,Action,116593191 Toy Story 3,2010,Adventure,414984497 Terminator Salvation,2009,Action,125320003 Furious 7,2015,Crime,350034110 World War Z,2013,Thriller,202351611 X-Men: Days of Future Past,2014,Fantasy,233914986 Star Trek Into Darkness,2013,Adventure,228756232 Jack the Giant Slayer,2013,Fantasy,65171860 The Great Gatsby,2013,Drama,144812796 Prince of Persia: The Sands of Time,2010,Romance,90755643 Pacific Rim,2013,Action,101785482 Transformers: Dark of the Moon,2011,Sci-Fi,352358779 Indiana Jones and the Kingdom of the Crystal Skull,2008,Action,317011114 Brave,2012,Family,237282182 Star Trek Beyond,2016,Thriller,130468626 WALL·E,2008,Animation,223806889 Rush Hour 3,2007,Action,140080850 2012,2009,Action,166112167 A Christmas Carol,2009,Fantasy,137850096 Jupiter Ascending,2015,Sci-Fi,47375327 The Legend of Tarzan,2016,Romance,124051759 "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe",2005,Adventure,291709845 X-Men: Apocalypse,2016,Adventure,154985087 The Dark Knight,2008,Thriller,533316061 Up,2009,Family,292979556 Monsters vs. Aliens,2009,Action,198332128 Iron Man,2008,Action,318298180 Hugo,2011,Family,73820094 Wild Wild West,1999,Sci-Fi,113745408 The Mummy: Tomb of the Dragon Emperor,2008,Fantasy,102176165 Suicide Squad,2016,Adventure,161087183 Evan Almighty,2007,Family,100289690 Edge of Tomorrow,2014,Adventure,100189501 Waterworld,1995,Sci-Fi,88246220 G.I. Joe: The Rise of Cobra,2009,Sci-Fi,150167630 Inside Out,2015,Comedy,356454367 The Jungle Book,2016,Drama,362645141 Iron Man 2,2010,Sci-Fi,312057433 Snow White and the Huntsman,2012,Action,155111815 Maleficent,2014,Fantasy,241407328 Dawn of the Planet of the Apes,2014,Drama,208543795 47 Ronin,2013,Fantasy,38297305 Captain America: The Winter Soldier,2014,Action,259746958 Shrek Forever After,2010,Animation,238371987 Tomorrowland,2015,Action,93417865 Big Hero 6,2014,Adventure,222487711 Wreck-It Ralph,2012,Sci-Fi,189412677 The Polar Express,2004,Animation,665426 Independence Day: Resurgence,2016,Adventure,102315545 How to Train Your Dragon,2010,Adventure,217387997 Terminator 3: Rise of the Machines,2003,Action,150350192 Guardians of the Galaxy,2014,Adventure,333130696 Interstellar,2014,Drama,187991439 Inception,2010,Sci-Fi,292568851 The Fast and the Furious,2001,Crime,144512310 The Curious Case of Benjamin Button,2008,Drama,127490802 X-Men: First Class,2011,Sci-Fi,146405371 The Hunger Games: Mockingjay - Part 2,2015,Sci-Fi,281666058 The Sorcerer's Apprentice,2010,Adventure,63143812 Poseidon,2006,Action,60655503 Alice Through the Looking Glass,2016,Fantasy,76846624 Shrek the Third,2007,Comedy,320706665 Warcraft,2016,Fantasy,46978995 Terminator Genisys,2015,Adventure,89732035 The Chronicles of Narnia: The Voyage of the Dawn Treader,2010,Adventure,104383624 Pearl Harbor,2001,War,198539855 Transformers,2007,Action,318759914 Alexander,2004,Biography,34293771 Harry Potter and the Order of the Phoenix,2007,Family,292000866 Harry Potter and the Goblet of Fire,2005,Family,289994397 Hancock,2008,Action,227946274 I Am Legend,2007,Sci-Fi,256386216 Charlie and the Chocolate Factory,2005,Adventure,206456431 Ratatouille,2007,Comedy,206435493 Batman Begins,2005,Adventure,205343774 Madagascar: Escape 2 Africa,2008,Comedy,179982968 Night at the Museum: Battle of the Smithsonian,2009,Comedy,177243721 X-Men Origins: Wolverine,2009,Thriller,179883016 The Matrix Revolutions,2003,Action,139259759 Frozen,2013,Adventure,400736600 The Matrix Reloaded,2003,Action,281492479 Thor: The Dark World,2013,Adventure,206360018 Mad Max: Fury Road,2015,Action,153629485 Angels & Demons,2009,Mystery,133375846 Thor,2011,Fantasy,181015141 Bolt,2008,Comedy,114053579 G-Force,2009,Fantasy,119420252 Wrath of the Titans,2012,Adventure,83640426 Dark Shadows,2012,Horror,79711678 Mission: Impossible - Rogue Nation,2015,Thriller,195000874 The Wolfman,2010,Drama,61937495 The Legend of Tarzan,2016,Adventure,124051759 Bee Movie,2007,Family,126597121 Kung Fu Panda 2,2011,Action,165230261 The Last Airbender,2010,Action,131564731 Mission: Impossible III,2006,Adventure,133382309 White House Down,2013,Thriller,73103784 Mars Needs Moms,2011,Family,21379315 Flushed Away,2006,Family,64459316 Pan,2015,Adventure,34964818 Mr. Peabody & Sherman,2014,Adventure,111505642 Troy,2004,Adventure,133228348 Madagascar 3: Europe's Most Wanted,2012,Family,216366733 Die Another Day,2002,Thriller,160201106 Ghostbusters,2016,Action,118099659 Armageddon,1998,Sci-Fi,201573391 Men in Black II,2002,Action,190418803 Beowulf,2007,Adventure,82161969 Kung Fu Panda 3,2016,Comedy,143523463 Mission: Impossible - Ghost Protocol,2011,Action,209364921 Rise of the Guardians,2012,Fantasy,103400692 Fun with Dick and Jane,2005,Comedy,110332737 The Last Samurai,2003,Action,111110575 Exodus: Gods and Kings,2014,Drama,65007045 Star Trek,2009,Sci-Fi,257704099 Spider-Man,2002,Romance,403706375 How to Train Your Dragon 2,2014,Action,176997107 Gods of Egypt,2016,Action,31141074 Stealth,2005,Adventure,31704416 Watchmen,2009,Mystery,107503316 Lethal Weapon 4,1998,Thriller,129734803 Hulk,2003,Sci-Fi,132122995 G.I. Joe: Retaliation,2013,Thriller,122512052 Sahara,2005,Comedy,68642452 Final Fantasy: The Spirits Within,2001,Animation,32131830 Captain America: The First Avenger,2011,Adventure,176636816 The World Is Not Enough,1999,Adventure,126930660 Master and Commander: The Far Side of the World,2003,Adventure,93926386 The Twilight Saga: Breaking Dawn - Part 2,2012,Drama,292298923 Happy Feet 2,2011,Musical,63992328 The Incredible Hulk,2008,Adventure,134518390 The BFG,2016,Family,52792307 The Revenant,2015,Drama,183635922 Turbo,2013,Animation,83024900 Rango,2011,Adventure,123207194 Penguins of Madagascar,2014,Animation,83348920 The Bourne Ultimatum,2007,Thriller,227137090 Kung Fu Panda,2008,Animation,215395021 Ant-Man,2015,Action,180191634 The Hunger Games: Catching Fire,2013,Thriller,424645577 The Twilight Saga: Breaking Dawn - Part 2,2012,Adventure,292298923 Home,2015,Sci-Fi,177343675 War of the Worlds,2005,Adventure,234277056 Bad Boys II,2003,Crime,138396624 Puss in Boots,2011,Family,149234747 Salt,2010,Crime,118311368 Noah,2014,Adventure,101160529 The Adventures of Tintin,2011,Action,77564037 Harry Potter and the Prisoner of Azkaban,2004,Adventure,249358727 Australia,2008,Romance,49551662 After Earth,2013,Action,60522097 Dinosaur,2000,Animation,137748063 Night at the Museum: Secret of the Tomb,2014,Fantasy,113733726 Megamind,2010,Sci-Fi,148337537 Harry Potter and the Sorcerer's Stone,2001,Adventure,317557891 R.I.P.D.,2013,Comedy,33592415 Pirates of the Caribbean: The Curse of the Black Pearl,2003,Adventure,305388685 The Hunger Games: Mockingjay - Part 1,2014,Thriller,337103873 The Da Vinci Code,2006,Thriller,217536138 Rio 2,2014,Comedy,131536019 X-Men 2,2003,Thriller,214948780 Fast Five,2011,Crime,209805005 Sherlock Holmes: A Game of Shadows,2011,Action,186830669 Clash of the Titans,2010,Fantasy,163192114 Total Recall,1990,Sci-Fi,119412921 The 13th Warrior,1999,Adventure,32694788 The Bourne Legacy,2012,Action,113165635 Batman & Robin,1997,Action,107285004 How the Grinch Stole Christmas,2000,Fantasy,260031035 The Day After Tomorrow,2004,Sci-Fi,186739919 Mission: Impossible II,2000,Thriller,215397307 The Perfect Storm,2000,Action,182618434 Fantastic 4: Rise of the Silver Surfer,2007,Sci-Fi,131920333 Life of Pi,2012,Adventure,124976634 Ghost Rider,2007,Fantasy,115802596 Jason Bourne,2016,Thriller,108521835 Charlie's Angels: Full Throttle,2003,Action,100685880 Prometheus,2012,Sci-Fi,126464904 Stuart Little 2,2002,Comedy,64736114 Elysium,2013,Thriller,93050117 The Chronicles of Riddick,2004,Sci-Fi,57637485 RoboCop,2014,Crime,58607007 Speed Racer,2008,Action,43929341 How Do You Know,2010,Comedy,30212620 Knight and Day,2010,Comedy,76418654 Oblivion,2013,Adventure,89021735 Star Wars: Episode III - Revenge of the Sith,2005,Sci-Fi,380262555 Star Wars: Episode II - Attack of the Clones,2002,Fantasy,310675583 "Monsters, Inc.",2001,Family,289907418 The Wolverine,2013,Thriller,132550960 Star Wars: Episode I - The Phantom Menace,1999,Adventure,474544677 The Croods,2013,Comedy,187165546 Windtalkers,2002,War,40911830 The Huntsman: Winter's War,2016,Drama,47952020 Teenage Mutant Ninja Turtles,2014,Action,190871240 Gravity,2013,Drama,274084951 Dante's Peak,1997,Thriller,67155742 Fantastic Four,2015,Action,56114221 Night at the Museum,2006,Fantasy,250863268 San Andreas,2015,Action,155181732 Tomorrow Never Dies,1997,Adventure,125332007 The Patriot,2000,Drama,113330342 Ocean's Twelve,2004,Thriller,125531634 Mr. & Mrs. Smith,2005,Comedy,186336103 Insurgent,2015,Adventure,129995817 The Aviator,2004,Biography,102608827 Gulliver's Travels,2010,Fantasy,42776259 The Green Hornet,2011,Comedy,98780042 300: Rise of an Empire,2014,Fantasy,106369117 The Smurfs,2011,Fantasy,142614158 Home on the Range,2004,Family,50026353 Allegiant,2016,Adventure,66002193 Real Steel,2011,Action,85463309 The Smurfs 2,2013,Fantasy,71017784 Speed 2: Cruise Control,1997,Romance,48068396 Ender's Game,2013,Action,61656849 Live Free or Die Hard,2007,Adventure,134520804 The Lord of the Rings: The Fellowship of the Ring,2001,Action,313837577 Around the World in 80 Days,2004,Action,24004159 Ali,2001,Sport,58183966 The Cat in the Hat,2003,Family,100446895 "I, Robot",2004,Action,144795350 Kingdom of Heaven,2005,History,47396698 Stuart Little,1999,Adventure,140015224 The Princess and the Frog,2009,Family,104374107 The Martian,2015,Drama,228430993 The Island,2005,Thriller,35799026 Town & Country,2001,Comedy,6712451 Gone in Sixty Seconds,2000,Crime,101643008 Gladiator,2000,Drama,187670866 Minority Report,2002,Thriller,132014112 Harry Potter and the Chamber of Secrets,2002,Family,261970615 Casino Royale,2006,Thriller,167007184 Planet of the Apes,2001,Sci-Fi,180011740 Terminator 2: Judgment Day,1991,Action,204843350 Public Enemies,2009,Romance,97030725 American Gangster,2007,Drama,130127620 True Lies,1994,Action,146282411 The Taking of Pelham 1 2 3,2009,Action,65452312 Little Fockers,2010,Romance,148383780 The Other Guys,2010,Action,119219978 Eraser,1996,Action,101228120 Django Unchained,2012,Drama,162804648 The Hunchback of Notre Dame,1996,Romance,100117603 The Emperor's New Groove,2000,Adventure,89296573 The Expendables 2,2012,Thriller,85017401 National Treasure,2004,Comedy,173005002 Eragon,2006,Action,75030163 Where the Wild Things Are,2009,Drama,77222184 Pan,2015,Family,34964818 Epic,2013,Adventure,107515297 The Tourist,2010,Thriller,67631157 End of Days,1999,Action,66862068 Blood Diamond,2006,Adventure,57366262 The Wolf of Wall Street,2013,Comedy,116866727 Batman Forever,1995,Adventure,184031112 Starship Troopers,1997,Sci-Fi,54700065 Cloud Atlas,2012,Sci-Fi,27098580 Legend of the Guardians: The Owls of Ga'Hoole,2010,Adventure,55673333 Catwoman,2004,Fantasy,40198710 Hercules,2014,Adventure,72660029 Treasure Planet,2002,Animation,38120554 Land of the Lost,2009,Adventure,49392095 The Expendables 3,2014,Action,39292022 Point Break,2015,Action,28772222 Son of the Mask,2005,Family,17010646 In the Heart of the Sea,2015,Action,24985612 The Adventures of Pluto Nash,2002,Sci-Fi,4411102 Green Zone,2010,Thriller,35024475 The Peanuts Movie,2015,Adventure,130174897 The Spanish Prisoner,1997,Mystery,10200000 The Mummy Returns,2001,Fantasy,202007640 Gangs of New York,2002,Drama,77679638 The Flowers of War,2011,Drama,9213 Surf's Up,2007,Comedy,58867694 The Stepford Wives,2004,Comedy,59475623 Black Hawk Down,2001,War,108638745 The Campaign,2012,Comedy,86897182 The Fifth Element,1997,Adventure,63540020 Sex and the City 2,2010,Comedy,95328937 The Road to El Dorado,2000,Comedy,50802661 Ice Age: Continental Drift,2012,Adventure,161317423 Cinderella,2015,Romance,201148159 The Lovely Bones,2009,Drama,43982842 Finding Nemo,2003,Comedy,380838870 The Lord of the Rings: The Return of the King,2003,Drama,377019252 The Lord of the Rings: The Two Towers,2002,Action,340478898 Seventh Son,2014,Adventure,17176900 Lara Croft: Tomb Raider,2001,Thriller,131144183 Transcendence,2014,Thriller,23014504 Jurassic Park III,2001,Thriller,181166115 Rise of the Planet of the Apes,2011,Action,176740650 The Spiderwick Chronicles,2008,Family,71148699 A Good Day to Die Hard,2013,Thriller,67344392 The Alamo,2004,Western,22406362 The Incredibles,2004,Adventure,261437578 Cutthroat Island,1995,Adventure,11000000 Percy Jackson & the Olympians: The Lightning Thief,2010,Family,88761720 Men in Black,1997,Family,250147615 Toy Story 2,1999,Comedy,245823397 Unstoppable,2010,Thriller,81557479 Rush Hour 2,2001,Comedy,226138454 What Lies Beneath,2000,Fantasy,155370362 Cloudy with a Chance of Meatballs,2009,Family,124870275 Ice Age: Dawn of the Dinosaurs,2009,Family,196573705 The Secret Life of Walter Mitty,2013,Fantasy,58229120 Charlie's Angels,2000,Action,125305545 The Departed,2006,Crime,132373442 Mulan,1998,Fantasy,120618403 Tropic Thunder,2008,Action,110416702 The Girl with the Dragon Tattoo,2011,Drama,102515793 Die Hard with a Vengeance,1995,Adventure,100012500 Sherlock Holmes,2009,Adventure,209019489 Atlantis: The Lost Empire,2001,Action,84037039 Alvin and the Chipmunks: The Road Chip,2015,Animation,85884815 Valkyrie,2008,History,83077470 You Don't Mess with the Zohan,2008,Comedy,100018837 Pixels,2015,Animation,78747585 A.I. Artificial Intelligence,2001,Drama,78616689 The Haunted Mansion,2003,Comedy,75817994 Contact,1997,Drama,100853835 Hollow Man,2000,Action,73209340 The Interpreter,2005,Crime,72515360 Percy Jackson: Sea of Monsters,2013,Fantasy,68558662 Lara Croft Tomb Raider: The Cradle of Life,2003,Fantasy,65653758 Now You See Me 2,2016,Comedy,64685359 The Saint,1997,Action,61355436 Spy Game,2001,Thriller,26871 Mission to Mars,2000,Thriller,60874615 Rio,2011,Adventure,143618384 Bicentennial Man,1999,Comedy,58220776 Volcano,1997,Action,47474112 The Devil's Own,1997,Thriller,42877165 K-19: The Widowmaker,2002,History,35168677 Fantastic Four,2015,Sci-Fi,56114221 Conan the Barbarian,1982,Fantasy,37567440 Cinderella Man,2005,Drama,61644321 The Nutcracker in 3D,2010,Fantasy,190562 Seabiscuit,2003,History,120147445 Twister,1996,Adventure,241688385 The Fast and the Furious,2001,Thriller,144512310 Cast Away,2000,Adventure,233630478 Happy Feet,2006,Music,197992827 The Bourne Supremacy,2004,Mystery,176049130 Air Force One,1997,Drama,172620724 Ocean's Eleven,2001,Crime,183405771 The Three Musketeers,2011,Romance,20315324 Hotel Transylvania,2012,Animation,148313048 Enchanted,2007,Animation,127706877 Safe House,2012,Thriller,126149655 102 Dalmatians,2000,Adventure,66941559 Tower Heist,2011,Action,78009155 The Holiday,2006,Romance,63224849 Enemy of the State,1998,Drama,111544445 It's Complicated,2009,Drama,112703470 Ocean's Thirteen,2007,Crime,117144465 Open Season,2006,Animation,84303558 Divergent,2014,Mystery,150832203 Enemy at the Gates,2001,War,51396781 The Rundown,2003,Action,47592825 Last Action Hero,1993,Comedy,50016394 Memoirs of a Geisha,2005,Drama,57010853 The Fast and the Furious: Tokyo Drift,2006,Action,62494975 Arthur Christmas,2011,Fantasy,46440491 Meet Joe Black,1998,Drama,44606335 Collateral Damage,2002,Drama,40048332 Mirror Mirror,2012,Adventure,64933670 Scott Pilgrim vs. the World,2010,Romance,31494270 The Core,2003,Action,31111260 Nutty Professor II: The Klumps,2000,Sci-Fi,123307945 Scooby-Doo,2002,Comedy,153288182 Dredd,2012,Action,13401683 Click,2006,Comedy,137340146 Cats & Dogs: The Revenge of Kitty Galore,2010,Action,43575716 Jumper,2008,Adventure,80170146 Hellboy II: The Golden Army,2008,Sci-Fi,75754670 Zodiac,2007,Mystery,33048353 The 6th Day,2000,Sci-Fi,34543701 Bruce Almighty,2003,Comedy,242589580 The Expendables,2010,Action,102981571 Mission: Impossible,1996,Adventure,180965237 The Hunger Games,2012,Sci-Fi,407999255 The Hangover Part II,2011,Comedy,254455986 Batman Returns,1992,Action,162831698 Over the Hedge,2006,Animation,155019340 Lilo & Stitch,2002,Family,145771527 Deep Impact,1998,Thriller,140459099 RED 2,2013,Crime,53215979 The Longest Yard,2005,Sport,158115031 Alvin and the Chipmunks: Chipwrecked,2011,Animation,133103929 Grown Ups 2,2013,Comedy,133668525 Get Smart,2008,Comedy,130313314 Something's Gotta Give,2003,Comedy,124590960 Shutter Island,2010,Mystery,127968405 Four Christmases,2008,Comedy,120136047 Robots,2005,Adventure,128200012 Face/Off,1997,Thriller,112225777 Bedtime Stories,2008,Romance,109993847 Road to Perdition,2002,Crime,104054514 Just Go with It,2011,Comedy,103028109 Con Air,1997,Action,101087161 Eagle Eye,2008,Action,101111837 Cold Mountain,2003,History,95632614 The Book of Eli,2010,Thriller,94822707 Flubber,1997,Sci-Fi,92969824 The Haunting,1999,Mystery,91188905 Space Jam,1996,Fantasy,90443603 The Pink Panther,2006,Comedy,82226474 The Day the Earth Stood Still,2008,Sci-Fi,79363785 Conspiracy Theory,1997,Thriller,76081498 Fury,2014,War,85707116 Six Days Seven Nights,1998,Comedy,74329966 Yogi Bear,2010,Family,100169068 Spirit: Stallion of the Cimarron,2002,Animation,73215310 Zookeeper,2011,Family,80360866 Lost in Space,1998,Action,69102910 The Manchurian Candidate,2004,Mystery,65948711 Hotel Transylvania 2,2015,Animation,169692572 Fantasia 2000,1999,Music,60507228 The Time Machine,2002,Adventure,56684819 Mighty Joe Young,1998,Thriller,50628009 Swordfish,2001,Action,69772969 The Legend of Zorro,2005,Action,45356386 What Dreams May Come,1998,Romance,55350897 Little Nicky,2000,Fantasy,39442871 The Brothers Grimm,2005,Adventure,37899638 Mars Attacks!,1996,Sci-Fi,37754208 Surrogates,2009,Sci-Fi,38542418 Thirteen Days,2000,History,34566746 Daylight,1996,Thriller,32885565 Walking with Dinosaurs 3D,2013,Animation,36073232 Battlefield Earth,2000,Adventure,21471685 Looney Tunes: Back in Action,2003,Family,20950820 Nine,2009,Romance,19673424 Timeline,2003,Adventure,19480739 The Postman,1997,Adventure,17593391 Babe: Pig in the City,1998,Fantasy,18318000 The Last Witch Hunter,2015,Fantasy,27356090 Red Planet,2000,Action,17473245 Arthur and the Invisibles,2006,Animation,15131330 Oceans,2009,Documentary,19406406 A Sound of Thunder,2005,Horror,1891821 Pompeii,2014,History,23219748 A Beautiful Mind,2001,Drama,170708996 The Lion King,1994,Animation,422783777 Journey 2: The Mysterious Island,2012,Adventure,103812241 Cloudy with a Chance of Meatballs 2,2013,Fantasy,119793567 Red Dragon,2002,Drama,92930005 Hidalgo,2004,Western,67286731 Jack and Jill,2011,Comedy,74158157 2 Fast 2 Furious,2003,Crime,127083765 The Little Prince,2015,Family,1339152 The Invasion,2007,Thriller,15071514 The Adventures of Rocky & Bullwinkle,2000,Family,26000610 The Secret Life of Pets,2016,Family,323505540 The League of Extraordinary Gentlemen,2003,Adventure,66462600 Despicable Me 2,2013,Sci-Fi,368049635 Independence Day,1996,Adventure,306124059 The Lost World: Jurassic Park,1997,Sci-Fi,229074524 Madagascar,2005,Comedy,193136719 Children of Men,2006,Thriller,35286428 X-Men,2000,Adventure,157299717 Wanted,2008,Action,134568845 The Rock,1996,Action,134006721 Ice Age: The Meltdown,2006,Action,195329763 50 First Dates,2004,Comedy,120776832 Hairspray,2007,Drama,118823091 Exorcist: The Beginning,2004,Mystery,41814863 Inspector Gadget,1999,Action,97360069 Now You See Me,2013,Thriller,117698894 Grown Ups,2010,Comedy,162001186 The Terminal,2004,Comedy,77032279 Hotel for Dogs,2009,Family,73023275 Vertical Limit,2000,Action,68473360 Charlie Wilson's War,2007,Comedy,66636385 Shark Tale,2004,Comedy,160762022 Dreamgirls,2006,Musical,103338338 Be Cool,2005,Crime,55808744 Munich,2005,Thriller,47379090 Tears of the Sun,2003,Action,43426961 Killers,2010,Comedy,47000485 The Man from U.N.C.L.E.,2015,Adventure,45434443 Spanglish,2004,Drama,42044321 Monster House,2006,Mystery,73661010 Bandits,2001,Comedy,41523271 First Knight,1995,Action,37600435 Anna and the King,1999,Drama,39251128 Immortals,2011,Drama,83503161 Hostage,2005,Action,34636443 Titan A.E.,2000,Adventure,22751979 Hollywood Homicide,2003,Thriller,30013346 Soldier,1998,Drama,14567883 Monkeybone,2001,Animation,5409517 Flight of the Phoenix,2004,Thriller,21009180 Unbreakable,2000,Drama,94999143 Minions,2015,Comedy,336029560 Sucker Punch,2011,Action,36381716 Snake Eyes,1998,Thriller,55585389 Sphere,1998,Drama,36976367 The Angry Birds Movie,2016,Comedy,107225164 Fool's Gold,2008,Adventure,70224196 Funny People,2009,Comedy,51814190 The Kingdom,2007,Thriller,47456450 Talladega Nights: The Ballad of Ricky Bobby,2006,Action,148213377 Dr. Dolittle 2,2001,Comedy,112950721 Braveheart,1995,History,75600000 Jarhead,2005,Action,62647540 The Simpsons Movie,2007,Comedy,183132370 The Majestic,2001,Drama,27796042 Driven,2001,Drama,32616869 Two Brothers,2004,Family,18947630 The Village,2004,Drama,114195633 Doctor Dolittle,1998,Comedy,144156464 Signs,2002,Sci-Fi,227965690 Shrek 2,2004,Comedy,436471036 Cars,2006,Comedy,244052771 Runaway Bride,1999,Romance,152149590 xXx,2002,Action,141204016 The SpongeBob Movie: Sponge Out of Water,2015,Family,162495848 Ransom,1996,Crime,136448821 Inglourious Basterds,2009,War,120523073 Hook,1991,Comedy,119654900 Hercules,2014,Adventure,72660029 Die Hard 2,1990,Action,117541000 S.W.A.T.,2003,Thriller,116643346 Vanilla Sky,2001,Thriller,100614858 Lady in the Water,2006,Mystery,42272747 AVP: Alien vs. Predator,2004,Thriller,80281096 Alvin and the Chipmunks: The Squeakquel,2009,Music,219613391 We Were Soldiers,2002,Action,78120196 Olympus Has Fallen,2013,Action,98895417 Star Trek: Insurrection,1998,Adventure,70117571 Battle Los Angeles,2011,Sci-Fi,83552429 Big Fish,2003,Drama,66257002 Wolf,1994,Horror,65012000 War Horse,2011,Drama,79883359 The Monuments Men,2014,War,78031620 The Abyss,1989,Thriller,54222000 Wall Street: Money Never Sleeps,2010,Drama,52474616 Dracula Untold,2014,Fantasy,55942830 The Siege,1998,Thriller,40932372 Stardust,2007,Romance,38345403 Seven Years in Tibet,1997,Drama,37901509 The Dilemma,2011,Drama,48430355 Bad Company,2002,Adventure,30157016 Doom,2005,Sci-Fi,28031250 I Spy,2002,Thriller,33105600 Underworld: Awakening,2012,Action,62321039 Rock of Ages,2012,Musical,38509342 Hart's War,2002,Drama,19076815 Killer Elite,2011,Thriller,25093607 Rollerball,2002,Sci-Fi,18990542 Ballistic: Ecks vs. Sever,2002,Crime,14294842 Hard Rain,1998,Drama,19819494 Osmosis Jones,2001,Adventure,13596911 Blackhat,2015,Action,7097125 Sky Captain and the World of Tomorrow,2004,Thriller,37760080 Basic Instinct 2,2006,Mystery,5851188 Escape Plan,2013,Crime,25121291 The Legend of Hercules,2014,Fantasy,18821279 The Sum of All Fears,2002,Drama,118471320 The Twilight Saga: Eclipse,2010,Fantasy,300523113 The Score,2001,Thriller,71069884 Despicable Me,2010,Family,251501645 Money Train,1995,Comedy,35324232 Ted 2,2015,Comedy,81257500 Agora,2009,History,617840 Mystery Men,1999,Fantasy,29655590 Hall Pass,2011,Comedy,45045037 The Insider,1999,Thriller,28965197 Body of Lies,2008,Drama,39380442 Abraham Lincoln: Vampire Hunter,2012,Horror,37516013 Entrapment,1999,Crime,87704396 The X Files,1998,Sci-Fi,83892374 The Last Legion,2007,Action,5932060 Saving Private Ryan,1998,Action,216119491 Need for Speed,2014,Crime,43568507 What Women Want,2000,Comedy,182805123 Ice Age,2002,Adventure,176387405 Dreamcatcher,2003,Drama,33685268 Lincoln,2012,War,182204440 The Matrix,1999,Action,171383253 Apollo 13,1995,Adventure,172071312 Total Recall,1990,Action,119412921 The Santa Clause 2,2002,Fantasy,139225854 Les Misérables,2012,Musical,148775460 You've Got Mail,1998,Romance,115731542 Step Brothers,2008,Comedy,100468793 The Mask of Zorro,1998,Adventure,93771072 Due Date,2010,Drama,100448498 Unbroken,2014,Sport,115603980 Space Cowboys,2000,Action,90454043 Cliffhanger,1993,Action,84049211 Broken Arrow,1996,Thriller,70450000 The Kid,2000,Family,69688384 World Trade Center,2006,History,70236496 Mona Lisa Smile,2003,Drama,63695760 The Dictator,2012,Romance,59617068 Eyes Wide Shut,1999,Mystery,55637680 Annie,2014,Comedy,85911262 Focus,2015,Crime,53846915 This Means War,2012,Comedy,54758461 Blade: Trinity,2004,Sci-Fi,52397389 Primary Colors,1998,Drama,38966057 Resident Evil: Retribution,2012,Action,42345531 Death Race,2008,Sci-Fi,36064910 The Long Kiss Goodnight,1996,Action,33328051 Proof of Life,2000,Drama,32598931 Zathura: A Space Adventure,2005,Adventure,28045540 Fight Club,1999,Drama,37023395 We Are Marshall,2006,Drama,43532294 Hudson Hawk,1991,Action,17218080 Lucky Numbers,2000,Crime,10014234 "I, Frankenstein",2014,Sci-Fi,19059018 Oliver Twist,2005,Drama,1987287 Elektra,2005,Action,24407944 Sin City: A Dame to Kill For,2014,Crime,13750556 Random Hearts,1999,Drama,31054924 Everest,2015,Biography,43247140 Perfume: The Story of a Murderer,2006,Fantasy,2208939 Austin Powers in Goldmember,2002,Comedy,213079163 Astro Boy,2009,Family,19548064 Jurassic Park,1993,Thriller,356784000 Wyatt Earp,1994,Biography,25052000 Clear and Present Danger,1994,Action,122012710 Dragon Blade,2015,Action,72413 Littleman,2006,Crime,58255287 U-571,2000,Action,77086030 The American President,1995,Comedy,65000000 The Love Guru,2008,Sport,32178777 3000 Miles to Graceland,2001,Comedy,15738632 The Hateful Eight,2015,Mystery,54116191 Blades of Glory,2007,Comedy,118153533 Hop,2011,Adventure,108012170 300,2006,Fantasy,210592590 Meet the Fockers,2004,Comedy,279167575 Marley & Me,2008,Comedy,143151473 The Green Mile,1999,Mystery,136801374 Chicken Little,2005,Animation,135381507 Gone Girl,2014,Mystery,167735396 The Bourne Identity,2002,Thriller,121468960 GoldenEye,1995,Adventure,106635996 The General's Daughter,1999,Thriller,102678089 The Truman Show,1998,Sci-Fi,125603360 The Prince of Egypt,1998,Fantasy,101217900 Daddy Day Care,2003,Comedy,104148781 2 Guns,2013,Comedy,75573300 Cats & Dogs,2001,Fantasy,93375151 The Italian Job,2003,Action,106126012 Two Weeks Notice,2002,Comedy,93307796 Antz,1998,Comedy,90646554 Couples Retreat,2009,Comedy,109176215 Days of Thunder,1990,Action,82670733 Cheaper by the Dozen 2,2005,Family,82569532 The Scorch Trials,2015,Sci-Fi,81687587 Eat Pray Love,2010,Drama,80574010 The Family Man,2000,Comedy,75764085 RED,2010,Action,90356857 Any Given Sunday,1999,Drama,75530832 The Horse Whisperer,1998,Romance,75370763 Collateral,2004,Thriller,100003492 The Scorpion King,2002,Action,90341670 Ladder 49,2004,Thriller,74540762 Jack Reacher,2012,Action,80033643 Deep Blue Sea,1999,Sci-Fi,73648142 This Is It,2009,Documentary,71844424 Contagion,2011,Thriller,75638743 Kangaroo Jack,2003,Comedy,66734992 Coraline,2009,Family,75280058 The Happening,2008,Thriller,64505912 Man on Fire,2004,Thriller,77862546 The Shaggy Dog,2006,Family,61112916 Starsky & Hutch,2004,Comedy,88200225 Jingle All the Way,1996,Family,60573641 Hellboy,2004,Sci-Fi,59035104 A Civil Action,1998,Drama,56702901 ParaNorman,2012,Family,55994557 The Jackal,1997,Crime,54910560 Paycheck,2003,Action,53789313 Up Close & Personal,1996,Romance,51045801 The Tale of Despereaux,2008,Animation,50818750 The Tuxedo,2002,Comedy,50189179 Under Siege 2: Dark Territory,1995,Action,50024083 Jack Ryan: Shadow Recruit,2014,Drama,50549107 Joy,2015,Comedy,56443482 London Has Fallen,2016,Drama,62401264 Alien: Resurrection,1997,Horror,47748610 Shooter,2007,Action,46975183 The Boxtrolls,2014,Family,50807639 Practical Magic,1998,Fantasy,46611204 The Lego Movie,2014,Adventure,257756197 Miss Congeniality 2: Armed and Fabulous,2005,Crime,48472213 Reign of Fire,2002,Action,43060566 Gangster Squad,2013,Drama,45996718 Year One,2009,Adventure,43337279 Invictus,2009,Drama,37479778 Duplicity,2009,Romance,40559930 My Favorite Martian,1999,Comedy,36830057 The Sentinel,2006,Thriller,36279230 Planet 51,2009,Adventure,42194060 Star Trek: Nemesis,2002,Sci-Fi,43119879 Intolerable Cruelty,2003,Romance,35096190 Edge of Darkness,2010,Mystery,43290977 The Relic,1997,Sci-Fi,33927476 Analyze That,2002,Comedy,32122249 Righteous Kill,2008,Action,40076438 Mercury Rising,1998,Action,32940507 The Soloist,2009,Biography,31670931 The Legend of Bagger Vance,2000,Fantasy,30695227 Almost Famous,2000,Music,32522352 xXx: State of the Union,2005,Crime,26082914 Priest,2011,Thriller,29136626 Sinbad: Legend of the Seven Seas,2003,Adventure,26288320 Event Horizon,1997,Horror,26616590 The Avengers,2012,Sci-Fi,623279547 Dragonfly,2002,Fantasy,30063805 The Black Dahlia,2006,Crime,22518325 Flyboys,2006,Adventure,13082288 The Last Castle,2001,Thriller,18208078 Supernova,2000,Thriller,14218868 Winter's Tale,2014,Drama,22451 The Mortal Instruments: City of Bones,2013,Mystery,31165421 Meet Dave,2008,Romance,11802056 Dark Water,2005,Horror,25472967 Edtv,1999,Drama,22362500 Inkheart,2008,Fantasy,17281832 The Spirit,2008,Crime,19781879 Mortdecai,2015,Mystery,7605668 In the Name of the King: A Dungeon Siege Tale,2007,Action,4535117 Beyond Borders,2003,Romance,4426297 The Great Raid,2005,Drama,10166502 Deadpool,2016,Adventure,363024263 Holy Man,1998,Drama,12065985 American Sniper,2014,Biography,350123553 Goosebumps,2015,Adventure,80021740 Just Like Heaven,2005,Romance,48291624 The Flintstones in Viva Rock Vegas,2000,Sci-Fi,35231365 Rambo III,1988,Action,53715611 Leatherheads,2008,Sport,31199215 Did You Hear About the Morgans?,2009,Comedy,29580087 The Internship,2013,Comedy,44665963 Resident Evil: Afterlife,2010,Action,60128566 Red Tails,2012,History,49875589 The Devil's Advocate,1997,Mystery,60984028 That's My Boy,2012,Comedy,36931089 DragonHeart,1996,Action,51317350 After the Sunset,2004,Drama,28328132 Ghost Rider: Spirit of Vengeance,2011,Thriller,51774002 Captain Corelli's Mandolin,2001,War,25528495 The Pacifier,2005,Family,113006880 Walking Tall,2004,Crime,45860039 Forrest Gump,1994,Comedy,329691196 Alvin and the Chipmunks,2007,Family,217326336 Meet the Parents,2000,Comedy,166225040 Pocahontas,1995,Romance,141600000 Superman,1978,Action,134218018 The Nutty Professor,1996,Comedy,128769345 Hitch,2005,Comedy,177575142 George of the Jungle,1997,Action,105263257 American Wedding,2003,Romance,104354205 Captain Phillips,2013,Thriller,107100855 Date Night,2010,Romance,98711404 Casper,1995,Comedy,100328194 The Equalizer,2014,Action,101530738 Maid in Manhattan,2002,Drama,93815117 Crimson Tide,1995,Drama,91400000 The Pursuit of Happyness,2006,Drama,162586036 Flightplan,2005,Drama,89706988 Disclosure,1994,Thriller,83000000 City of Angels,1998,Romance,78745923 Kill Bill: Vol. 1,2003,Action,70098138 Bowfinger,1999,Comedy,66365290 Kill Bill: Vol. 2,2004,Crime,66207920 Tango & Cash,1989,Thriller,63408614 Death Becomes Her,1992,Fantasy,58422650 Shanghai Noon,2000,Adventure,56932305 Executive Decision,1996,Adventure,68750000 Mr. Popper's Penguins,2011,Family,68218041 The Forbidden Kingdom,2008,Fantasy,25040293 Free Birds,2013,Animation,55747724 Alien 3,1992,Sci-Fi,55473600 Evita,1996,Biography,49994804 Ronin,1998,Thriller,41609593 The Ghost and the Darkness,1996,Adventure,38553833 Paddington,2014,Fantasy,76137505 The Watch,2012,Sci-Fi,34350553 The Hunted,2003,Drama,34238611 Instinct,1999,Thriller,34098563 Stuck on You,2003,Comedy,33828318 Semi-Pro,2008,Sport,33472850 The Pirates! Band of Misfits,2012,Animation,31051126 Changeling,2008,Mystery,35707327 Chain Reaction,1996,Action,20550712 The Fan,1996,Drama,18573791 The Phantom of the Opera,2004,Musical,51225796 Elizabeth: The Golden Age,2007,Drama,16264475 Æon Flux,2005,Sci-Fi,25857987 Gods and Generals,2003,History,12870569 Turbulence,1997,Thriller,11466088 Imagine That,2009,Family,16088610 Muppets Most Wanted,2014,Family,51178893 Thunderbirds,2004,Sci-Fi,6768055 Burlesque,2010,Music,39440655 A Very Long Engagement,2004,Romance,6167817 Blade II,2002,Action,81645152 Seven Pounds,2008,Drama,69951824 Bullet to the Head,2012,Action,9483821 The Godfather: Part III,1990,Drama,66676062 Elizabethtown,2005,Comedy,26838389 "You, Me and Dupree",2006,Comedy,75604320 Superman II,1980,Romance,108200000 Gigli,2003,Comedy,5660084 All the King's Men,2006,Drama,7221458 Shaft,2000,Thriller,70327868 Anastasia,1997,Fantasy,58297830 Moulin Rouge!,2001,Musical,57386369 Domestic Disturbance,2001,Thriller,45207112 Black Mass,2015,Crime,62563543 Flags of Our Fathers,2006,Drama,33574332 Law Abiding Citizen,2009,Crime,73343413 Grindhouse,2007,Horror,25031037 Beloved,1998,Drama,22843047 Lucky You,2007,Drama,5755286 Catch Me If You Can,2002,Biography,164435221 Zero Dark Thirty,2012,Drama,95720716 The Break-Up,2006,Drama,118683135 Mamma Mia!,2008,Musical,143704210 Valentine's Day,2010,Comedy,110476776 The Dukes of Hazzard,2005,Action,80270227 The Thin Red Line,1998,Drama,36385763 The Change-Up,2011,Fantasy,37035845 Man on the Moon,1999,Drama,34580635 Casino,1995,Biography,42438300 From Paris with Love,2010,Thriller,23324666 Bulletproof Monk,2003,Action,23020488 "Me, Myself & Irene",2000,Comedy,90567722 Barnyard,2006,Animation,72601713 The Twilight Saga: New Moon,2009,Fantasy,296623634 Shrek,2001,Adventure,267652016 The Adjustment Bureau,2011,Romance,62453315 Robin Hood: Prince of Thieves,1991,Romance,165500000 Jerry Maguire,1996,Sport,153620822 Ted,2012,Fantasy,218628680 As Good as It Gets,1997,Comedy,147637474 Patch Adams,1998,Drama,135014968 Anchorman 2: The Legend Continues,2013,Comedy,2175312 Mr. Deeds,2002,Comedy,126203320 Super 8,2011,Sci-Fi,126975169 Erin Brockovich,2000,Drama,125548685 How to Lose a Guy in 10 Days,2003,Romance,105807520 22 Jump Street,2014,Crime,191616238 Interview with the Vampire: The Vampire Chronicles,1994,Horror,105264608 Yes Man,2008,Comedy,97680195 Central Intelligence,2016,Comedy,126088877 Stepmom,1998,Comedy,91030827 Daddy's Home,2015,Family,150315155 Into the Woods,2014,Adventure,127997349 Inside Man,2006,Mystery,88504640 Payback,1999,Drama,81517441 Congo,1995,Mystery,81022333 Knowing,2009,Thriller,79948113 Failure to Launch,2006,Comedy,88658172 "Crazy, Stupid, Love.",2011,Romance,84244877 Garfield,2004,Comedy,75367693 Christmas with the Kranks,2004,Family,73701902 Moneyball,2011,Biography,75605492 Outbreak,1995,Thriller,67823573 Non-Stop,2014,Mystery,91439400 Race to Witch Mountain,2009,Thriller,67128202 V for Vendetta,2005,Action,70496802 Shanghai Knights,2003,Action,60470220 Curious George,2006,Adventure,58336565 Herbie Fully Loaded,2005,Sport,66002004 Don't Say a Word,2001,Crime,54997476 Hansel & Gretel: Witch Hunters,2013,Horror,55682070 Unfaithful,2002,Thriller,52752475 I Am Number Four,2011,Action,55092830 Syriana,2005,Drama,50815288 13 Hours,2016,Drama,52822418 The Book of Life,2014,Family,50150619 Firewall,2006,Crime,48745150 Absolute Power,1997,Thriller,50007168 G.I. Jane,1997,Action,48154732 The Game,1997,Thriller,48265581 Silent Hill,2006,Mystery,46982632 The Replacements,2000,Comedy,44737059 American Reunion,2012,Comedy,56724080 The Negotiator,1998,Mystery,44484065 Into the Storm,2014,Action,47553512 Beverly Hills Cop III,1994,Thriller,42610000 Gremlins 2: The New Batch,1990,Horror,41482207 The Judge,2014,Crime,47105085 The Peacemaker,1997,Thriller,41256277 Resident Evil: Apocalypse,2004,Sci-Fi,50740078 Bridget Jones: The Edge of Reason,2004,Comedy,40203020 Out of Time,2003,Thriller,40905277 On Deadly Ground,1994,Thriller,38590500 The Adventures of Sharkboy and Lavagirl 3-D,2005,Adventure,39177541 The Beach,2000,Drama,39778599 Raising Helen,2004,Drama,37486138 Ninja Assassin,2009,Action,38105077 For Love of the Game,1999,Sport,35168395 Striptease,1996,Thriller,32800000 Marmaduke,2010,Comedy,33643461 Hereafter,2010,Drama,32741596 Murder by Numbers,2002,Crime,31874869 Assassins,1995,Crime,30306268 Hannibal Rising,2007,Drama,27667947 The Story of Us,1999,Romance,27067160 The Host,2013,Action,26616999 Basic,2003,Thriller,26536120 Blood Work,2002,Drama,26199517 The International,2009,Drama,25450527 Escape from L.A.,1996,Adventure,25407250 The Iron Giant,1999,Comedy,23159305 The Life Aquatic with Steve Zissou,2004,Drama,24006726 Free State of Jones,2016,Biography,20389967 The Life of David Gale,2003,Thriller,19593740 Man of the House,2005,Comedy,19118247 Run All Night,2015,Action,26442251 Eastern Promises,2007,Mystery,17114882 Into the Blue,2005,Thriller,18472363 The Messenger: The Story of Joan of Arc,1999,History,14131298 Your Highness,2011,Fantasy,21557240 Dream House,2011,Drama,21283440 Mad City,1997,Drama,10556196 Baby's Day Out,1994,Crime,16671505 The Scarlet Letter,1995,Romance,10400000 Fair Game,2010,Biography,9528092 Domino,2005,Action,10137232 Jade,1995,Drama,9795017 Gamer,2009,Thriller,20488579 Beautiful Creatures,2013,Romance,19445217 Death to Smoochy,2002,Comedy,8355815 Zoolander 2,2016,Comedy,28837115 The Big Bounce,2004,Comedy,6471394 What Planet Are You From?,2000,Sci-Fi,6291602 Drive Angry,2011,Thriller,10706786 Street Fighter: The Legend of Chun-Li,2009,Crime,8742261 The One,2001,Action,43905746 The Adventures of Ford Fairlane,1990,Action,21413502 Traffic,2000,Thriller,124107476 Indiana Jones and the Last Crusade,1989,Action,197171806 Chappie,2015,Action,31569268 The Bone Collector,1999,Mystery,66488090 Panic Room,2002,Drama,95308367 Three Kings,1999,Adventure,60652036 Child 44,2015,Thriller,1206135 Rat Race,2001,Adventure,56607223 K-PAX,2001,Drama,50173190 Kate & Leopold,2001,Comedy,47095453 Bedazzled,2000,Romance,37879996 The Cotton Club,1984,Drama,25900000 3:10 to Yuma,2007,Adventure,53574088 Taken 3,2014,Action,89253340 Out of Sight,1998,Thriller,37339525 The Cable Guy,1996,Comedy,60154431 Dick Tracy,1990,Crime,103738726 The Thomas Crown Affair,1999,Crime,69304264 Riding in Cars with Boys,2001,Comedy,29781453 Happily N'Ever After,2006,Adventure,15519841 Mary Reilly,1996,Drama,5600000 My Best Friend's Wedding,1997,Comedy,126805112 America's Sweethearts,2001,Romance,93607673 Insomnia,2002,Thriller,67263182 Star Trek: First Contact,1996,Sci-Fi,92001027 Jonah Hex,2010,Fantasy,10539414 Courage Under Fire,1996,Action,58918501 Liar Liar,1997,Comedy,181395380 The Flintstones,1994,Comedy,130512915 Taken 2,2012,Thriller,139852971 Scary Movie 3,2003,Comedy,110000082 Miss Congeniality,2000,Romance,106807667 Journey to the Center of the Earth,2008,Adventure,101702060 The Princess Diaries 2: Royal Engagement,2004,Family,95149435 The Pelican Brief,1993,Mystery,100768056 The Client,1994,Drama,92115211 The Bucket List,2007,Drama,93452056 Patriot Games,1992,Thriller,83287363 Monster-in-Law,2005,Romance,82931301 Prisoners,2013,Mystery,60962878 Training Day,2001,Thriller,76261036 Galaxy Quest,1999,Sci-Fi,71423726 Scary Movie 2,2001,Comedy,71277420 The Muppets,2011,Musical,88625922 Blade,1998,Horror,70001065 Coach Carter,2005,Drama,67253092 Changing Lanes,2002,Drama,66790248 Anaconda,1997,Adventure,65557989 Coyote Ugly,2000,Drama,60786269 Love Actually,2003,Drama,59365105 A Bug's Life,1998,Fantasy,162792677 From Hell,2001,Thriller,31598308 The Specialist,1994,Crime,57362581 Tin Cup,1996,Comedy,53854588 Kicking & Screaming,2005,Romance,52580895 The Hitchhiker's Guide to the Galaxy,2005,Adventure,51019112 Fat Albert,2004,Romance,48114556 Resident Evil: Extinction,2007,Horror,50648679 Blended,2014,Comedy,46280507 Last Holiday,2006,Adventure,38360195 The River Wild,1994,Crime,46815748 The Indian in the Cupboard,1995,Drama,35617599 Savages,2012,Drama,47307550 Cellular,2004,Crime,32003620 Johnny English,2003,Adventure,27972410 The Ant Bully,2006,Family,28133159 Dune,1984,Adventure,27400000 Across the Universe,2007,Drama,24343673 Revolutionary Road,2008,Drama,22877808 16 Blocks,2006,Drama,36883539 Babylon A.D.,2008,Sci-Fi,22531698 The Glimmer Man,1996,Comedy,20400913 Multiplicity,1996,Sci-Fi,20101861 Aliens in the Attic,2009,Sci-Fi,25200412 The Pledge,2001,Mystery,19719930 The Producers,2005,Musical,19377727 Dredd,2012,Action,13401683 The Phantom,1996,Comedy,17300889 All the Pretty Horses,2000,Western,15527125 Nixon,1995,Drama,13560960 The Ghost Writer,2010,Mystery,15523168 Deep Rising,1998,Horror,11146409 Miracle at St. Anna,2008,War,7916887 Curse of the Golden Flower,2006,Drama,6565495 Bangkok Dangerous,2008,Crime,15279680 Big Trouble,2002,Crime,7262288 Love in the Time of Cholera,2007,Romance,4584886 Shadow Conspiracy,1997,Thriller,2154540 Johnny English Reborn,2011,Crime,8129455 Argo,2012,Biography,136019448 The Fugitive,1993,Thriller,183875760 The Bounty Hunter,2010,Action,67061228 Sleepers,1996,Crime,53300852 Rambo: First Blood Part II,1985,Action,150415432 The Juror,1996,Thriller,44834712 Pinocchio,1940,Fantasy,84300000 Heaven's Gate,1980,Western,1500000 Underworld: Evolution,2006,Fantasy,62318875 Victor Frankenstein,2015,Thriller,5773519 Finding Forrester,2000,Drama,51768623 28 Days,2000,Comedy,37035515 Unleashed,2005,Drama,24520892 The Sweetest Thing,2002,Romance,24430272 The Firm,1993,Thriller,158348400 Charlie St. Cloud,2010,Fantasy,31136950 The Mechanic,2011,Crime,29113588 21 Jump Street,2012,Action,138447667 Notting Hill,1999,Drama,116006080 Chicken Run,2000,Animation,106793915 Along Came Polly,2004,Comedy,87856565 Boomerang,1992,Drama,70100000 The Heat,2013,Crime,159578352 Cleopatra,1963,Drama,57750000 Here Comes the Boom,2012,Sport,45290318 High Crimes,2002,Mystery,41543207 The Mirror Has Two Faces,1996,Drama,41252428 The Mothman Prophecies,2002,Horror,35228696 Brüno,2009,Comedy,59992760 Licence to Kill,1989,Thriller,34667015 Red Riding Hood,2011,Horror,37652565 15 Minutes,2001,Crime,24375436 Super Mario Bros.,1993,Fantasy,20915465 Lord of War,2005,Thriller,24127895 Hero,2002,Adventure,84961 One for the Money,2012,Comedy,26404753 The Interview,2014,Comedy,6105175 The Warrior's Way,2010,Action,5664251 Micmacs,2009,Action,1260917 8 Mile,2002,Music,116724075 A Knight's Tale,2001,Action,56083966 The Medallion,2003,Action,22108977 The Sixth Sense,1999,Mystery,293501675 Man on a Ledge,2012,Thriller,18600911 The Big Year,2011,Comedy,7204138 The Karate Kid,1984,Action,90800000 American Hustle,2013,Crime,150117807 The Proposal,2009,Drama,163947053 Double Jeopardy,1999,Crime,116735231 Back to the Future Part II,1989,Sci-Fi,118500000 Lucy,2014,Thriller,126546825 Fifty Shades of Grey,2015,Drama,166147885 Spy Kids 3-D: Game Over,2003,Family,111760631 A Time to Kill,1996,Drama,108706165 Cheaper by the Dozen,2003,Comedy,138614544 Lone Survivor,2013,Action,125069696 A League of Their Own,1992,Drama,107458785 The Conjuring 2,2016,Mystery,102310175 The Social Network,2010,Drama,96917897 He's Just Not That Into You,2009,Drama,93952276 Scary Movie 4,2006,Comedy,90703745 Scream 3,2000,Horror,89138076 Back to the Future Part III,1990,Western,87666629 Get Hard,2015,Comedy,90353764 Bram Stoker's Dracula,1992,Horror,82522790 Julie & Julia,2009,Biography,94125426 42,2013,Drama,95001343 The Talented Mr. Ripley,1999,Thriller,81292135 Dumb and Dumber To,2014,Comedy,86208010 Eight Below,2006,Adventure,81593527 The Intern,2015,Drama,75274748 Ride Along 2,2016,Comedy,90835030 The Last of the Mohicans,1992,Drama,72455275 Ray,2004,Drama,75305995 Sin City,2005,Crime,74098862 Vantage Point,2008,Thriller,72266306 "I Love You, Man",2009,Romance,71347010 Shallow Hal,2001,Romance,70836296 JFK,1991,History,70405498 Big Momma's House 2,2006,Comedy,70163652 The Mexican,2001,Adventure,66808615 Unbroken,2014,War,115603980 17 Again,2009,Fantasy,64149837 The Other Woman,2014,Comedy,83906114 The Final Destination,2009,Horror,66466372 Bridge of Spies,2015,Thriller,72306065 Behind Enemy Lines,2001,Drama,59068786 Shall We Dance,2004,Romance,57887882 Small Soldiers,1998,Comedy,53955614 Spawn,1997,Action,54967359 The Count of Monte Cristo,2002,Adventure,54228104 The Lincoln Lawyer,2011,Drama,57981889 Unknown,2011,Action,61094903 The Prestige,2006,Mystery,53082743 Horrible Bosses 2,2014,Comedy,54414716 Escape from Planet Earth,2013,Adventure,57011847 Apocalypto,2006,Thriller,50859889 The Living Daylights,1987,Action,51185897 Predators,2010,Action,52000688 Legal Eagles,1986,Romance,49851591 Secret Window,2004,Mystery,47781388 The Lake House,2006,Drama,52320979 The Skeleton Key,2005,Thriller,47806295 The Odd Life of Timothy Green,2012,Comedy,51853450 Made of Honor,2008,Romance,46012734 Jersey Boys,2014,Music,47034272 The Rainmaker,1997,Drama,45856732 Gothika,2003,Thriller,59588068 Amistad,1997,History,44175394 Medicine Man,1992,Romance,45500797 Aliens vs. Predator: Requiem,2007,Horror,41797066 Ri¢hie Ri¢h,1994,Family,38087756 Autumn in New York,2000,Romance,37752931 Paul,2011,Comedy,37371385 The Guilt Trip,2012,Comedy,37101011 Scream 4,2011,Mystery,38176892 8MM,1999,Mystery,36283504 The Doors,1991,Music,35183792 Sex Tape,2014,Comedy,38543473 Hanging Up,2000,Drama,36037909 Final Destination 5,2011,Horror,42575718 Mickey Blue Eyes,1999,Romance,33864342 Pay It Forward,2000,Drama,33508922 Fever Pitch,2005,Sport,42071069 Drillbit Taylor,2008,Comedy,32853640 A Million Ways to Die in the West,2014,Western,42615685 The Shadow,1994,Adventure,32055248 Extremely Loud & Incredibly Close,2011,Mystery,31836745 Morning Glory,2010,Drama,30993544 Get Rich or Die Tryin',2005,Biography,30981850 The Art of War,2000,Adventure,30199105 Rent,2005,Drama,29077547 Bless the Child,2000,Drama,29374178 The Out-of-Towners,1999,Comedy,28535768 The Island of Dr. Moreau,1996,Sci-Fi,27663982 The Musketeer,2001,Action,27053815 The Other Boleyn Girl,2008,Drama,26814957 Sweet November,2001,Drama,25178165 The Reaping,2007,Thriller,25117498 Mean Streets,1973,Drama,32645 Renaissance Man,1994,Comedy,24332324 Colombiana,2011,Crime,36665854 The Magic Sword: Quest for Camelot,1998,Family,22717758 City by the Sea,2002,Thriller,22433915 At First Sight,1999,Drama,22326247 Torque,2004,Comedy,21176322 City Hall,1996,Drama,20300000 Marie Antoinette,2006,Drama,15962471 Kiss of Death,1995,Thriller,14942422 Get Carter,2000,Drama,14967182 The Impossible,2012,Thriller,18996755 Ishtar,1987,Action,14375181 Fantastic Mr. Fox,2009,Crime,20999103 Life or Something Like It,2002,Romance,14448589 Memoirs of an Invisible Man,1992,Comedy,14358033 Amélie,2001,Comedy,33201661 New York Minute,2004,Comedy,14018364 Alfie,2004,Romance,13395939 Big Miracle,2012,Romance,20113965 The Deep End of the Ocean,1999,Drama,13376506 Feardotcom,2002,Thriller,13208023 Cirque du Freak: The Vampire's Assistant,2009,Fantasy,13838130 Victor Frankenstein,2015,Horror,5773519 Duplex,2003,Comedy,9652000 Raise the Titanic,1980,Adventure,7000000 Universal Soldier: The Return,1999,Action,10431220 Pandorum,2009,Action,10326062 Impostor,2001,Mystery,6114237 Extreme Ops,2002,Thriller,4835968 Just Visiting,2001,Fantasy,4777007 Sunshine,2007,Thriller,3675072 A Thousand Words,2012,Drama,18438149 Delgo,2008,Adventure,511920 The Gunman,2015,Action,10640645 Alex Rider: Operation Stormbreaker,2006,Adventure,652526 Disturbia,2007,Drama,80050171 Hackers,1995,Thriller,7564000 The Hunting Party,2007,Thriller,876671 The Hudsucker Proxy,1994,Fantasy,2869369 The Warlords,2007,History,128978 Nomad: The Warrior,2005,War,77231 Snowpiercer,2013,Thriller,4563029 The Crow,1994,Fantasy,50693162 The Time Traveler's Wife,2009,Fantasy,63411478 The Fast and the Furious,2001,Crime,144512310 Frankenweenie,2012,Horror,35287788 Serenity,2005,Thriller,25335935 Against the Ropes,2004,Romance,5881504 Superman III,1983,Sci-Fi,60000000 Grudge Match,2013,Comedy,29802761 Red Cliff,2008,History,626809 Sweet Home Alabama,2002,Romance,127214072 The Ugly Truth,2009,Romance,88915214 Sgt. Bilko,1996,Comedy,30400000 Spy Kids 2: Island of Lost Dreams,2002,Action,85570368 Star Trek: Generations,1994,Thriller,75668868 The Grandmaster,2013,Drama,6594136 Water for Elephants,2011,Romance,58700247 The Hurricane,1999,Drama,50668906 Enough,2002,Crime,39177215 Heartbreakers,2001,Crime,40334024 Paul Blart: Mall Cop 2,2015,Action,71038190 Angel Eyes,2001,Drama,24044532 Joe Somebody,2001,Comedy,22770864 The Ninth Gate,1999,Thriller,18653746 Extreme Measures,1996,Thriller,17305211 Rock Star,2001,Drama,16991902 Precious,2009,Drama,47536959 White Squall,1996,Adventure,10300000 The Thing,1982,Mystery,13782838 Riddick,2013,Action,41997790 Switchback,1997,Mystery,6482195 Texas Rangers,2001,Action,623374 City of Ember,2008,Family,7871693 The Master,2012,Drama,16377274 The Express,2008,Drama,9589875 The 5th Wave,2016,Thriller,34912982 Creed,2015,Sport,109712885 The Town,2010,Thriller,92173235 What to Expect When You're Expecting,2012,Comedy,41102171 Burn After Reading,2008,Drama,60338891 Nim's Island,2008,Adventure,48006503 Rush,2013,Action,26903709 Magnolia,1999,Drama,22450975 Cop Out,2010,Crime,44867349 How to Be Single,2016,Romance,46813366 Dolphin Tale,2011,Drama,72279690 Twilight,2008,Romance,191449475 John Q,2002,Thriller,71026631 Blue Streak,1999,Thriller,68208190 We're the Millers,2013,Comedy,150368971 Breakdown,1997,Thriller,50129186 Never Say Never Again,1983,Action,55500000 Hot Tub Time Machine,2010,Sci-Fi,50213619 Dolphin Tale 2,2014,Family,42019483 Reindeer Games,2000,Family,23360779 A Man Apart,2003,Action,26183197 Aloha,2015,Drama,20991497 Ghosts of Mississippi,1996,Drama,13052741 Snow Falling on Cedars,1999,Drama,14378353 The Rite,2011,Mystery,33037754 Gattaca,1997,Drama,12339633 Isn't She Great,2000,Biography,2954405 Space Chimps,2008,Animation,30105968 Head of State,2003,Comedy,37788228 The Hangover,2009,Comedy,277313371 Ip Man 3,2015,History,2126511 Austin Powers: The Spy Who Shagged Me,1999,Comedy,205399422 Batman,1989,Action,251188924 There Be Dragons,2011,War,1068392 Lethal Weapon 3,1992,Crime,144731527 The Blind Side,2009,Biography,255950375 Spy Kids,2001,Adventure,112692062 Horrible Bosses,2011,Crime,117528646 True Grit,2010,Adventure,171031347 The Devil Wears Prada,2006,Comedy,124732962 Star Trek: The Motion Picture,1979,Mystery,82300000 Identity Thief,2013,Comedy,134455175 Cape Fear,1991,Thriller,79100000 21,2008,Thriller,81159365 Trainwreck,2015,Romance,110008260 Guess Who,2005,Comedy,67962333 The English Patient,1996,War,78651430 L.A. Confidential,1997,Crime,64604977 Sky High,2005,Comedy,63939454 In & Out,1997,Comedy,63826569 Species,1995,Thriller,60054449 A Nightmare on Elm Street,1984,Horror,26505000 The Cell,2000,Horror,61280963 The Man in the Iron Mask,1998,Action,56876365 Secretariat,2010,Sport,59699513 TMNT,2007,Comedy,54132596 Radio,2003,Sport,52277485 Friends with Benefits,2011,Comedy,55802754 Neighbors 2: Sorority Rising,2016,Comedy,55291815 Saving Mr. Banks,2013,History,83299761 Malcolm X,1992,History,48169908 This Is 40,2012,Comedy,67523385 Old Dogs,2009,Comedy,49474048 Underworld: Rise of the Lycans,2009,Fantasy,45802315 License to Wed,2007,Comedy,43792641 The Benchwarmers,2006,Sport,57651794 Must Love Dogs,2005,Romance,43894863 Donnie Brasco,1997,Crime,41954997 Resident Evil,2002,Horror,39532308 Poltergeist,1982,Fantasy,76600000 The Ladykillers,2004,Comedy,39692139 Max Payne,2008,Crime,40687294 In Time,2011,Thriller,37553932 The Back-up Plan,2010,Comedy,37481242 Something Borrowed,2011,Comedy,39026186 Black Knight,2001,Adventure,33422806 Street Fighter,1994,Action,33423521 The Pianist,2002,War,32519322 From Hell,2001,Thriller,31598308 The Nativity Story,2006,Drama,37617947 House of Wax,2005,Horror,32048809 Closer,2004,Drama,33987757 J. Edgar,2011,Drama,37304950 Mirrors,2008,Horror,30691439 Queen of the Damned,2002,Horror,30307804 Predator 2,1990,Sci-Fi,30669413 Untraceable,2008,Crime,28687835 Blast from the Past,1999,Comedy,26494611 Jersey Girl,2004,Comedy,25266129 Alex Cross,2012,Thriller,25863915 Midnight in the Garden of Good and Evil,1997,Mystery,25078937 Nanny McPhee Returns,2010,Fantasy,28995450 Hoffa,1992,Biography,24276500 The X Files: I Want to Believe,2008,Drama,20981633 Ella Enchanted,2004,Fantasy,22913677 Concussion,2015,Drama,34531832 Abduction,2011,Thriller,28064226 Valiant,2005,Adventure,19447478 Wonder Boys,2000,Drama,19389454 Superhero Movie,2008,Sci-Fi,25871834 Broken City,2013,Thriller,19692608 Cursed,2005,Comedy,19294901 Premium Rush,2012,Action,20275446 Hot Pursuit,2015,Comedy,34507079 The Four Feathers,2002,Romance,18306166 Parker,2013,Action,17609982 Wimbledon,2004,Romance,16831505 Furry Vengeance,2010,Family,17596256 Lions for Lambs,2007,Thriller,14998070 Flight of the Intruder,1991,Action,14587732 Walk Hard: The Dewey Cox Story,2007,Comedy,18317151 The Shipping News,2001,Drama,11405825 American Outlaws,2001,Action,13264986 The Young Victoria,2009,History,10991381 Whiteout,2009,Action,10268846 The Tree of Life,2011,Drama,13303319 Knock Off,1998,Action,10076136 Sabotage,2014,Action,10499968 The Order,2003,Mystery,7659747 Punisher: War Zone,2008,Action,7948159 Zoom,2006,Family,11631245 The Walk,2015,Biography,10137502 Warriors of Virtue,1997,Action,6448817 A Good Year,2006,Comedy,7458269 Radio Flyer,1992,Drama,4651977 "Blood In, Blood Out",1993,Drama,4496583 Smilla's Sense of Snow,1997,Thriller,2221994 Femme Fatale,2002,Thriller,6592103 Ride with the Devil,1999,War,630779 The Maze Runner,2014,Thriller,102413606 Unfinished Business,2015,Comedy,10214013 The Age of Innocence,1993,Romance,32000000 The Fountain,2006,Drama,10139254 Chill Factor,1999,Comedy,11227940 Stolen,2012,Thriller,183125 Ponyo,2008,Fantasy,15081783 The Longest Ride,2015,Romance,37432299 The Astronaut's Wife,1999,Sci-Fi,10654581 I Dreamed of Africa,2000,Romance,6543194 Playing for Keeps,2012,Romance,13101142 Mandela: Long Walk to Freedom,2013,Biography,8324748 A Few Good Men,1992,Drama,141340178 Exit Wounds,2001,Drama,51758599 Big Momma's House,2000,Comedy,117559438 The Darkest Hour,2011,Thriller,21426805 Step Up Revolution,2012,Romance,35057332 Snakes on a Plane,2006,Action,34014398 The Watcher,2000,Horror,28927720 The Punisher,2004,Crime,33682273 Goal! The Dream Begins,2005,Romance,4280577 Safe,2012,Crime,17120019 Pushing Tin,1999,Comedy,8406264 Star Wars: Episode VI - Return of the Jedi,1983,Sci-Fi,309125409 Doomsday,2008,Action,10955425 The Reader,2008,Romance,34180954 Elf,2003,Family,173381405 Phenomenon,1996,Fantasy,104632573 Snow Dogs,2002,Comedy,81150692 Scrooged,1988,Drama,60328558 Nacho Libre,2006,Comedy,80197993 Bridesmaids,2011,Romance,169076745 This Is the End,2013,Fantasy,101470202 Stigmata,1999,Horror,50041732 Men of Honor,2000,Biography,48814909 Takers,2010,Crime,57744720 The Big Wedding,2013,Comedy,21784432 "Big Mommas: Like Father, Like Son",2011,Comedy,37911876 Source Code,2011,Mystery,54696902 Alive,1993,Adventure,36733909 The Number 23,2007,Thriller,35063732 The Young and Prodigious T.S. Spivet,2013,Family,99462 Dreamer: Inspired by a True Story,2005,Drama,32701088 A History of Violence,2005,Crime,31493782 Transporter 2,2005,Crime,43095600 The Quick and the Dead,1995,Thriller,18636537 Laws of Attraction,2004,Comedy,17848322 Bringing Out the Dead,1999,Drama,16640210 Repo Men,2010,Thriller,13763130 Dragon Wars: D-War,2007,Horror,10956379 Bogus,1996,Fantasy,4357000 The Incredible Burt Wonderstone,2013,Comedy,22525921 Cats Don't Dance,1997,Fantasy,3562749 Cradle Will Rock,1999,Drama,2899970 The Good German,2006,Thriller,1304837 Apocalypse Now,1979,War,78800000 Going the Distance,2010,Comedy,17797316 Mr. Holland's Opus,1995,Drama,82528097 Criminal,2016,Thriller,14268533 Out of Africa,1985,Romance,87100000 Flight,2012,Thriller,93749203 Moonraker,1979,Sci-Fi,62700000 The Grand Budapest Hotel,2014,Crime,59073773 Hearts in Atlantis,2001,Mystery,24185781 Arachnophobia,1990,Fantasy,53133888 Frequency,2000,Sci-Fi,44983704 Ghostbusters,2016,Fantasy,118099659 Vacation,2015,Comedy,58879132 Get Shorty,1995,Crime,72077000 Chicago,2002,Musical,170684505 Big Daddy,1999,Comedy,163479795 American Pie 2,2001,Comedy,145096820 Toy Story,1995,Comedy,191796233 Speed,1994,Thriller,121248145 The Vow,2012,Drama,125014030 Extraordinary Measures,2010,Drama,11854694 Remember the Titans,2000,Biography,115648585 The Hunt for Red October,1990,Action,122012643 Lee Daniels' The Butler,2013,Biography,116631310 Dodgeball: A True Underdog Story,2004,Comedy,114324072 The Addams Family,1991,Fantasy,113502246 Ace Ventura: When Nature Calls,1995,Comedy,108360000 The Princess Diaries,2001,Comedy,108244774 The First Wives Club,1996,Comedy,105444419 Se7en,1995,Crime,100125340 District 9,2009,Sci-Fi,115646235 The SpongeBob SquarePants Movie,2004,Animation,85416609 Mystic River,2003,Mystery,90135191 Million Dollar Baby,2004,Sport,100422786 Analyze This,1999,Crime,106694016 The Notebook,2004,Drama,64286 27 Dresses,2008,Romance,76806312 Hannah Montana: The Movie,2009,Romance,79566871 Rugrats in Paris: The Movie,2000,Comedy,76501438 The Prince of Tides,1991,Romance,74787599 Legends of the Fall,1994,War,66528842 Up in the Air,2009,Romance,83813460 About Schmidt,2002,Comedy,65010106 Warm Bodies,2013,Romance,66359959 Looper,2012,Crime,66468315 Down to Earth,2001,Comedy,64172251 Babe,1995,Drama,66600000 Hope Springs,2012,Romance,63536011 Forgetting Sarah Marshall,2008,Romance,62877175 Four Brothers,2005,Thriller,74484168 Baby Mama,2008,Comedy,60269340 Hope Floats,1998,Romance,60033780 Bride Wars,2009,Comedy,58715510 Without a Paddle,2004,Adventure,58156435 13 Going on 30,2004,Romance,56044241 Midnight in Paris,2011,Comedy,56816662 The Nut Job,2014,Adventure,64238770 Blow,2001,Drama,52937130 Message in a Bottle,1999,Drama,52799004 Star Trek V: The Final Frontier,1989,Thriller,55210049 Like Mike,2002,Sport,51432423 Naked Gun 33 1/3: The Final Insult,1994,Crime,51109400 A View to a Kill,1985,Adventure,50300000 The Curse of the Were-Rabbit,2005,Mystery,56068547 P.S. I Love You,2007,Drama,53680848 Atonement,2007,Mystery,50921738 Letters to Juliet,2010,Romance,53021560 Black Rain,1989,Action,45645204 Corpse Bride,2005,Romance,53337608 Sicario,2015,Mystery,46875468 Southpaw,2015,Drama,52418902 Drag Me to Hell,2009,Thriller,42057340 The Age of Adaline,2015,Drama,42478175 Secondhand Lions,2003,Drama,41407470 Step Up 3D,2010,Music,42385520 Blue Crush,2002,Romance,40118420 Stranger Than Fiction,2006,Fantasy,40137776 30 Days of Night,2007,Horror,39568996 The Cabin in the Woods,2012,Fantasy,42043633 Meet the Spartans,2008,Comedy,38232624 Midnight Run,1988,Action,38413606 The Running Man,1987,Action,38122105 Little Shop of Horrors,1986,Sci-Fi,38747385 Hanna,2011,Thriller,40247512 Mortal Kombat: Annihilation,1997,Fantasy,35927406 Larry Crowne,2011,Comedy,35565975 Carrie,2013,Horror,35266619 Take the Lead,2006,Music,34703228 Gridiron Gang,2006,Sport,38432823 What's the Worst That Could Happen?,2001,Crime,32095318 9,2009,Mystery,31743332 Side Effects,2013,Crime,32154410 Winnie the Pooh,2011,Animation,26687172 Dumb and Dumberer: When Harry Met Lloyd,2003,Comedy,26096584 Bulworth,1998,Drama,26525834 Get on Up,2014,Biography,30513940 One True Thing,1998,Drama,23209440 Virtuosity,1995,Thriller,24048000 My Super Ex-Girlfriend,2006,Sci-Fi,22526144 Deliver Us from Evil,2014,Thriller,30523568 Sanctum,2011,Adventure,23070045 Little Black Book,2004,Comedy,20422207 The Five-Year Engagement,2012,Romance,28644770 Mr 3000,2004,Drama,21800302 The Next Three Days,2010,Drama,21129348 Ultraviolet,2006,Thriller,18500966 Assault on Precinct 13,2005,Action,19976073 The Replacement Killers,1998,Thriller,18967571 Fled,1996,Romance,17100000 Eight Legged Freaks,2002,Horror,17266505 Love & Other Drugs,2010,Comedy,32357532 88 Minutes,2007,Thriller,16930884 North Country,2005,Drama,18324242 The Whole Ten Yards,2004,Thriller,16323969 Splice,2009,Sci-Fi,16999046 Howard the Duck,1986,Romance,16295774 Pride and Glory,2008,Crime,15709385 The Cave,2005,Thriller,14888028 Alex & Emma,2003,Comedy,14208384 Wicker Park,2004,Thriller,12831121 Fright Night,2011,Horror,18298649 The New World,2005,History,12712093 Wing Commander,1999,Sci-Fi,11576087 In Dreams,1999,Thriller,11900000 Dragonball: Evolution,2009,Thriller,9353573 The Last Stand,2013,Crime,12026670 Godsend,2004,Drama,14334645 Chasing Liberty,2004,Romance,12189514 Hoodwinked Too! Hood vs. Evil,2011,Animation,10134754 An Unfinished Life,2005,Drama,8535575 The Imaginarium of Doctor Parnassus,2009,Fantasy,7689458 Runner Runner,2013,Crime,19316646 Antitrust,2001,Thriller,10965209 Glory,1989,War,26830000 Once Upon a Time in America,1984,Crime,5300000 Dead Man Down,2013,Thriller,10880926 The Merchant of Venice,2004,Drama,3752725 The Good Thief,2002,Crime,3517797 Miss Potter,2006,Biography,2975649 The Promise,2005,Fantasy,668171 DOA: Dead or Alive,2006,Adventure,480314 The Assassination of Jesse James by the Coward Robert Ford,2007,History,3904982 1911,2011,History,127437 Machine Gun Preacher,2011,Biography,537580 Pitch Perfect 2,2015,Comedy,183436380 Walk the Line,2005,Biography,119518352 Keeping the Faith,2000,Drama,37036404 The Borrowers,1997,Family,22359293 Frost/Nixon,2008,Drama,18593156 Serving Sara,2002,Comedy,16930185 The Boss,2016,Comedy,63034755 Cry Freedom,1987,Biography,5899797 Mumford,1999,Drama,4554569 Seed of Chucky,2004,Comedy,17016190 The Jacket,2005,Drama,6301131 Aladdin,1992,Animation,217350219 Straight Outta Compton,2015,Crime,161029270 Indiana Jones and the Temple of Doom,1984,Adventure,179870271 The Rugrats Movie,1998,Drama,100491683 Along Came a Spider,2001,Drama,74058698 Once Upon a Time in Mexico,2003,Thriller,55845943 Die Hard,1988,Action,81350242 Role Models,2008,Comedy,67266300 The Big Short,2015,Biography,70235322 Taking Woodstock,2009,Comedy,7443007 Miracle,2004,Sport,64371181 Dawn of the Dead,2004,Thriller,58885635 The Wedding Planner,2001,Romance,60400856 The Royal Tenenbaums,2001,Comedy,52353636 Identity,2003,Thriller,51475962 Last Vegas,2013,Romance,63910583 For Your Eyes Only,1981,Thriller,62300000 Serendipity,2001,Comedy,49968653 Timecop,1994,Thriller,44450000 Zoolander,2001,Comedy,45162741 Safe Haven,2013,Thriller,71346930 Hocus Pocus,1993,Family,39514713 No Reservations,2007,Romance,43097652 Kick-Ass,2010,Comedy,48043505 30 Minutes or Less,2011,Action,37053924 Dracula 2000,2000,Action,33000377 "Alexander and the Terrible, Horrible, No Good, Very Bad Day",2014,Family,66950483 Pride & Prejudice,2005,Romance,38372662 Blade Runner,1982,Thriller,27000000 Rob Roy,1995,Biography,31600000 3 Days to Kill,2014,Drama,30688364 We Own the Night,2007,Thriller,28563179 Lost Souls,2000,Drama,16779636 Just My Luck,2006,Romance,17324744 "Mystery, Alaska",1999,Comedy,8888143 The Spy Next Door,2010,Action,24268828 A Simple Wish,1997,Fantasy,8119205 Ghosts of Mars,2001,Action,8434601 Our Brand Is Crisis,2015,Comedy,6998324 Pride and Prejudice and Zombies,2016,Romance,10907291 Kundun,1997,Drama,5532301 How to Lose Friends & Alienate People,2008,Drama,2775593 Kick-Ass 2,2013,Comedy,28751715 Brick Mansions,2014,Action,20285518 Octopussy,1983,Adventure,67900000 Knocked Up,2007,Comedy,148734225 My Sister's Keeper,2009,Drama,49185998 "Welcome Home, Roscoe Jenkins",2008,Comedy,42168445 A Passage to India,1984,History,26400000 Notes on a Scandal,2006,Crime,17508670 Rendition,2007,Drama,9664316 Star Trek VI: The Undiscovered Country,1991,Action,74888996 Divine Secrets of the Ya-Ya Sisterhood,2002,Drama,69586544 The Jungle Book,2016,Drama,362645141 Kiss the Girls,1997,Drama,60491560 The Blues Brothers,1980,Crime,54200000 Joyful Noise,2012,Music,30920167 About a Boy,2002,Comedy,40566655 Lake Placid,1999,Action,31768374 Lucky Number Slevin,2006,Mystery,22494487 The Right Stuff,1983,Drama,21500000 Anonymous,2011,Drama,4463292 Dark City,1998,Drama,14337579 The Duchess,2008,Biography,13823741 The Newton Boys,1998,Western,10297897 Case 39,2009,Mystery,13248477 Suspect Zero,2004,Mystery,8712564 Martian Child,2007,Family,7486906 Spy Kids: All the Time in the World in 4D,2011,Comedy,38536376 Money Monster,2016,Thriller,41008532 Formula 51,2001,Thriller,5204007 Flawless,1999,Crime,4485485 Mindhunters,2004,Crime,4476235 What Just Happened,2008,Drama,1089365 The Statement,2003,Thriller,763044 Paul Blart: Mall Cop,2009,Action,20819129 Freaky Friday,2003,Romance,110222438 The 40-Year-Old Virgin,2005,Comedy,109243478 Shakespeare in Love,1998,Drama,100241322 A Walk Among the Tombstones,2014,Mystery,25977365 Kindergarten Cop,1990,Action,91457688 Pineapple Express,2008,Crime,87341380 Ever After: A Cinderella Story,1998,Comedy,65703412 Open Range,2003,Western,58328680 Flatliners,1990,Sci-Fi,61490000 A Bridge Too Far,1977,War,50800000 Red Eye,2005,Mystery,57859105 Final Destination 2,2003,Horror,46455802 "O Brother, Where Art Thou?",2000,Adventure,45506619 Legion,2010,Action,40168080 Pain & Gain,2013,Crime,49874933 In Good Company,2004,Romance,45489752 Clockstoppers,2002,Action,36985501 Silverado,1985,Action,33200000 Brothers,2009,Thriller,28501651 Agent Cody Banks 2: Destination London,2004,Family,23222861 New Year's Eve,2011,Comedy,54540525 Original Sin,2001,Romance,16252765 The Raven,2012,Thriller,16005978 Welcome to Mooseport,2004,Romance,14469428 Highlander: The Final Dimension,1994,Fantasy,13829734 Blood and Wine,1996,Drama,1075288 The Curse of the Jade Scorpion,2001,Comedy,7496522 Flipper,1996,Adventure,20047715 Self/less,2015,Mystery,12276810 The Constant Gardener,2005,Romance,33565375 The Passion of the Christ,2004,Drama,499263 Mrs. Doubtfire,1993,Comedy,219200000 Rain Man,1988,Drama,172825435 Gran Torino,2008,Drama,148085755 W.,2008,Biography,25517500 Taken,2008,Action,145000989 The Best of Me,2014,Romance,26761283 The Bodyguard,1992,Action,121945720 Schindler's List,1993,Biography,96067179 The Help,2011,Drama,169705587 The Fifth Estate,2013,Biography,3254172 Scooby-Doo 2: Monsters Unleashed,2004,Comedy,84185387 Freddy vs. Jason,2003,Thriller,82163317 Jimmy Neutron: Boy Genius,2001,Sci-Fi,80920948 Cloverfield,2008,Adventure,80034302 Teenage Mutant Ninja Turtles II: The Secret of the Ooze,1991,Adventure,78656813 The Untouchables,1987,Thriller,76270454 No Country for Old Men,2007,Drama,74273505 Ride Along,2014,Action,134141530 Bridget Jones's Diary,2001,Comedy,71500556 Chocolat,2000,Romance,71309760 "Legally Blonde 2: Red, White & Blonde",2003,Comedy,89808372 Parental Guidance,2012,Comedy,77264926 No Strings Attached,2011,Comedy,70625986 Tombstone,1993,Romance,56505065 Romeo Must Die,2000,Action,55973336 Final Destination 3,2006,Horror,54098051 The Lucky One,2012,Drama,60443237 Bridge to Terabithia,2007,Family,82234139 Finding Neverland,2004,Family,51676606 A Madea Christmas,2013,Comedy,52528330 The Grey,2011,Thriller,51533608 Hide and Seek,2005,Horror,51097664 Anchorman: The Legend of Ron Burgundy,2004,Comedy,84136909 Goodfellas,1990,Drama,46836394 Agent Cody Banks,2003,Adventure,47285499 Nanny McPhee,2005,Fantasy,47124400 Scarface,1983,Crime,44700000 Nothing to Lose,1997,Adventure,44455658 The Last Emperor,1987,Biography,43984230 Contraband,2012,Drama,66489425 Money Talks,1997,Comedy,41067398 There Will Be Blood,2007,Drama,40218903 The Wild Thornberrys Movie,2002,Animation,39880476 Rugrats Go Wild,2003,Musical,39399750 Undercover Brother,2002,Action,38230435 The Sisterhood of the Traveling Pants,2005,Romance,39008741 Kiss of the Dragon,2001,Crime,36833473 The House Bunny,2008,Romance,48237389 Million Dollar Arm,2014,Sport,36447959 The Giver,2014,Romance,45089048 What a Girl Wants,2003,Drama,35990505 Jeepers Creepers II,2003,Horror,35143332 Good Luck Chuck,2007,Romance,35000629 Cradle 2 the Grave,2003,Crime,34604054 The Hours,2002,Drama,41597830 She's the Man,2006,Romance,33687630 Mr. Bean's Holiday,2007,Family,32553210 Anacondas: The Hunt for the Blood Orchid,2004,Horror,31526393 Blood Ties,2013,Drama,41229 August Rush,2007,Drama,31655091 Elizabeth,1998,History,30012990 Bride of Chucky,1998,Horror,32368960 Tora! Tora! Tora!,1970,Action,14500000 Spice World,1997,Music,29247405 Dance Flick,2009,Music,25615792 The Shawshank Redemption,1994,Crime,28341469 Crocodile Dundee in Los Angeles,2001,Adventure,25590119 Kingpin,1996,Comedy,24944213 The Gambler,2014,Drama,33631221 August: Osage County,2013,Drama,37738400 A Lot Like Love,2005,Romance,21835784 Eddie the Eagle,2016,Drama,15785632 He Got Game,1998,Sport,21554585 Don Juan DeMarco,1994,Romance,22200000 The Losers,2010,Mystery,23527955 Don't Be Afraid of the Dark,2010,Horror,24042490 War,2007,Thriller,22466994 Punch-Drunk Love,2002,Comedy,17791031 EuroTrip,2004,Comedy,17718223 Half Past Dead,2002,Crime,15361537 Unaccompanied Minors,2006,Adventure,16647384 "Bright Lights, Big City",1988,Drama,16118077 The Adventures of Pinocchio,1996,Adventure,15091542 The Box,2009,Thriller,15045676 The Ruins,2008,Horror,17427926 The Next Best Thing,2000,Comedy,14983572 My Soul to Take,2010,Mystery,14637490 The Girl Next Door,2004,Comedy,14589444 Maximum Risk,1996,Romance,14095303 Stealing Harvard,2002,Crime,13973532 Legend,2015,Crime,1865774 Shark Night 3D,2011,Thriller,18860403 Angela's Ashes,1999,Drama,13038660 Draft Day,2014,Sport,28831145 The Conspirator,2010,Crime,11538204 Lords of Dogtown,2005,Sport,11008432 The 33,2015,Drama,12188642 Big Trouble in Little China,1986,Adventure,11100000 Warrior,2011,Sport,13651662 Michael Collins,1996,Biography,11030963 Gettysburg,1993,Drama,10769960 Stop-Loss,2008,War,10911750 Abandon,2002,Mystery,10719367 Brokedown Palace,1999,Mystery,10114315 The Possession,2012,Horror,49122319 Mrs. Winterbourne,1996,Romance,10070000 Straw Dogs,2011,Action,10324441 The Hoax,2006,Drama,7156933 Stone Cold,1991,Thriller,9286314 The Road,2009,Adventure,56692 Underclassman,2005,Thriller,5654777 Say It Isn't So,2001,Comedy,5516708 The World's Fastest Indian,2005,Sport,5128124 Snakes on a Plane,2006,Action,34014398 Tank Girl,1995,Action,4064333 King's Ransom,2005,Crime,4006906 Blindness,2008,Thriller,3073392 BloodRayne,2005,Action,1550000 Where the Truth Lies,2005,Mystery,871527 Without Limits,1998,Sport,777423 Me and Orson Welles,2008,Drama,1186957 The Best Offer,2013,Crime,85433 Bad Lieutenant: Port of Call New Orleans,2009,Crime,1697956 Little White Lies,2010,Comedy,183662 Love Ranch,2010,Sport,134904 The Counselor,2013,Drama,16969390 Dangerous Liaisons,1988,Drama,34700000 On the Road,2012,Adventure,717753 Star Trek IV: The Voyage Home,1986,Sci-Fi,109713132 Rocky Balboa,2006,Drama,70269171 Point Break,2015,Sport,28772222 Scream 2,1997,Horror,101334374 Jane Got a Gun,2016,Drama,1512815 Think Like a Man Too,2014,Comedy,65182182 The Whole Nine Yards,2000,Comedy,57262492 Footloose,1984,Music,80000000 Old School,2003,Comedy,74608545 The Fisher King,1991,Comedy,41895491 I Still Know What You Did Last Summer,1998,Mystery,39989008 Return to Me,2000,Romance,32662299 Zack and Miri Make a Porno,2008,Romance,31452765 Nurse Betty,2000,Comedy,25167270 The Men Who Stare at Goats,2009,War,32416109 Double Take,2001,Crime,20218 "Girl, Interrupted",1999,Biography,28871190 Win a Date with Tad Hamilton!,2004,Comedy,16964743 Muppets from Space,1999,Comedy,16290976 The Wiz,1978,Music,13000000 Ready to Rumble,2000,Sport,12372410 Play It to the Bone,1999,Drama,8427204 I Don't Know How She Does It,2011,Comedy,9639242 Piranha 3D,2010,Horror,25003072 Beyond the Sea,2004,Drama,6144806 The Princess and the Cobbler,1993,Animation,669276 The Bridge of San Luis Rey,2004,Drama,42880 Faster,2010,Crime,23225911 Howl's Moving Castle,2004,Adventure,4710455 Zombieland,2009,Sci-Fi,75590286 King Kong,2005,Drama,218051260 The Waterboy,1998,Comedy,161487252 Star Wars: Episode V - The Empire Strikes Back,1980,Fantasy,290158751 Bad Boys,1995,Crime,65807024 The Naked Gun 2½: The Smell of Fear,1991,Comedy,86930411 Final Destination,2000,Thriller,53302314 The Ides of March,2011,Drama,40962534 Pitch Black,2000,Horror,39235088 Someone Like You...,2001,Romance,27338033 Her,2013,Drama,25556065 Eddie the Eagle,2016,Sport,15785632 Joy Ride,2001,Thriller,21973182 The Adventurer: The Curse of the Midas Box,2013,Fantasy,4756 Anywhere But Here,1999,Drama,18653615 Chasing Liberty,2004,Romance,12189514 The Crew,2000,Crime,13019253 Haywire,2011,Thriller,18934858 Jaws: The Revenge,1987,Horror,20763013 Marvin's Room,1996,Drama,12782508 The Longshots,2008,Family,11508423 The End of the Affair,1999,Drama,10660147 Harley Davidson and the Marlboro Man,1991,Western,7434726 Coco Before Chanel,2009,Biography,6109075 Chéri,2009,Drama,2708188 Vanity Fair,2004,Drama,16123851 1408,2007,Horror,71975611 Spaceballs,1987,Comedy,38119483 The Water Diviner,2014,Drama,4190530 Ghost,1990,Fantasy,217631306 There's Something About Mary,1998,Romance,176483808 The Santa Clause,1994,Fantasy,144833357 The Rookie,2002,Sport,75597042 The Game Plan,2007,Sport,90636983 The Bridges of Madison County,1995,Drama,70960517 The Animal,2001,Comedy,55762229 The Hundred-Foot Journey,2014,Comedy,54235441 The Net,1995,Mystery,50728000 I Am Sam,2001,Drama,40270895 Son of God,2014,History,59696176 Underworld,2003,Fantasy,51483949 Derailed,2005,Drama,36020063 The Informant!,2009,Drama,33313582 Shadowlands,1993,Drama,25842000 Deuce Bigalow: European Gigolo,2005,Comedy,22264487 Delivery Man,2013,Drama,30659817 Victor Frankenstein,2015,Drama,5773519 Saving Silverman,2001,Comedy,19351569 Diary of a Wimpy Kid: Dog Days,2012,Comedy,49002815 Summer of Sam,1999,Thriller,19283782 Jay and Silent Bob Strike Back,2001,Comedy,30059386 The Island,2005,Sci-Fi,35799026 The Glass House,2001,Thriller,17951431 "Hail, Caesar!",2016,Comedy,29997095 Josie and the Pussycats,2001,Comedy,14252830 Homefront,2013,Action,19783777 The Little Vampire,2000,Adventure,13555988 I Heart Huckabees,2004,Comedy,12784713 RoboCop 3,1993,Crime,10696210 Megiddo: The Omega Code 2,2001,Action,5974653 Darling Lili,1970,Drama,5000000 Dudley Do-Right,1999,Romance,9694105 The Transporter Refueled,2015,Thriller,16027866 Black Book,2006,War,4398392 Joyeux Noel,2005,Music,1050445 Hit and Run,2012,Action,13746550 Mad Money,2008,Thriller,20668843 Before I Go to Sleep,2014,Mystery,2963012 Stone,2010,Thriller,1796024 Molière,2007,Comedy,634277 Out of the Furnace,2013,Crime,11326836 Michael Clayton,2007,Thriller,49024969 My Fellow Americans,1996,Comedy,22294341 Arlington Road,1999,Crime,24362501 To Rome with Love,2012,Comedy,16684352 Firefox,1982,Action,46700000 South Park: Bigger Longer & Uncut,1999,Fantasy,52008288 Death at a Funeral,2007,Comedy,8579684 Teenage Mutant Ninja Turtles III,1993,Fantasy,42660000 Hardball,2001,Sport,40219708 Silver Linings Playbook,2012,Romance,132088910 Freedom Writers,2007,Crime,36581633 The Transporter,2002,Action,25296447 Never Back Down,2008,Sport,24848292 The Rage: Carrie 2,1999,Thriller,17757087 Away We Go,2009,Drama,9430988 Swing Vote,2008,Drama,16284360 Moonlight Mile,2002,Romance,6830957 Tinker Tailor Soldier Spy,2011,Drama,24104113 Molly,1999,Drama,15593 The Beaver,2011,Drama,958319 The Best Little Whorehouse in Texas,1982,Comedy,69700000 eXistenZ,1999,Horror,2840417 Raiders of the Lost Ark,1981,Action,242374454 Home Alone 2: Lost in New York,1992,Comedy,173585516 Close Encounters of the Third Kind,1977,Sci-Fi,128300000 Pulse,2006,Thriller,20259297 Beverly Hills Cop II,1987,Comedy,153665036 Bringing Down the House,2003,Comedy,132541238 The Silence of the Lambs,1991,Crime,130727000 Wayne's World,1992,Comedy,121697350 Jackass 3D,2010,Comedy,117224271 Jaws 2,1978,Thriller,102922376 Beverly Hills Chihuahua,2008,Comedy,94497271 The Conjuring,2013,Thriller,137387272 Are We There Yet?,2005,Family,82301521 Tammy,2014,Comedy,84518155 Disturbia,2007,Drama,80050171 School of Rock,2003,Music,81257845 Mortal Kombat,1995,Thriller,70360285 Wicker Park,2004,Drama,12831121 White Chicks,2004,Crime,69148997 The Descendants,2011,Drama,82624961 Holes,2003,Family,67325559 The Last Song,2010,Romance,62933793 12 Years a Slave,2013,Biography,56667870 Drumline,2002,Music,56398162 Why Did I Get Married Too?,2010,Romance,60072596 Edward Scissorhands,1990,Romance,56362352 Me Before You,2016,Romance,56154094 Madea's Witness Protection,2012,Crime,65623128 Date Movie,2006,Romance,48546578 Return to Never Land,2002,Adventure,48423368 Selma,2014,Drama,52066000 The Jungle Book 2,2003,Animation,47887943 Boogeyman,2005,Thriller,46363118 Premonition,2007,Drama,47852604 The Tigger Movie,2000,Drama,45542421 Max,2015,Family,42652003 Epic Movie,2007,Comedy,39737645 Conan the Barbarian,1982,Adventure,37567440 Spotlight,2015,History,44988180 Lakeview Terrace,2008,Crime,39263506 The Grudge 2,2006,Horror,39143839 How Stella Got Her Groove Back,1998,Drama,37672350 Bill & Ted's Bogus Journey,1991,Music,38037513 Man of the Year,2006,Comedy,37442180 The American,2010,Crime,35596227 Selena,1997,Music,35422828 Vampires Suck,2010,Comedy,36658108 Babel,2006,Drama,34300771 This Is Where I Leave You,2014,Comedy,34290142 Doubt,2008,Drama,33422556 Team America: World Police,2004,Comedy,32774834 Texas Chainsaw 3D,2013,Thriller,34334256 Copycat,1995,Drama,32051917 Scary Movie 5,2013,Comedy,32014289 Milk,2008,Drama,31838002 Risen,2016,Mystery,36874745 Ghost Ship,2002,Horror,30079316 A Very Harold & Kumar 3D Christmas,2011,Comedy,35033759 Wild Things,1998,Mystery,29753944 The Debt,2010,Drama,31146570 High Fidelity,2000,Drama,27277055 One Missed Call,2008,Mystery,26876529 Eye for an Eye,1996,Crime,53146000 The Bank Job,2008,Romance,30028592 Eternal Sunshine of the Spotless Mind,2004,Drama,34126138 You Again,2010,Family,25677801 Street Kings,2008,Drama,26415649 The World's End,2013,Comedy,26003149 Nancy Drew,2007,Comedy,25584685 Daybreakers,2009,Thriller,29975979 She's Out of My League,2010,Comedy,31584722 Monte Carlo,2011,Family,23179303 Stay Alive,2006,Thriller,23078294 Quigley Down Under,1990,Drama,21413105 Alpha and Omega,2010,Comedy,25077977 The Covenant,2006,Fantasy,23292105 Shorts,2009,Family,20916309 To Die For,1995,Drama,21200000 Vampires,1998,Action,20241395 Psycho,1960,Mystery,32000000 My Best Friend's Girl,2008,Romance,19151864 Endless Love,2014,Romance,23393765 Georgia Rule,2007,Comedy,18882880 Under the Rainbow,1981,Comedy,8500000 Simon Birch,1998,Drama,18252684 Reign Over Me,2007,Drama,19661987 Into the Wild,2007,Biography,18352454 School for Scoundrels,2006,Comedy,17803796 Silent Hill: Revelation 3D,2012,Horror,17529157 From Dusk Till Dawn,1996,Crime,25753840 Pooh's Heffalump Movie,2005,Animation,18081626 Home for the Holidays,1995,Comedy,17518220 Kung Fu Hustle,2004,Action,17104669 The Country Bears,2002,Family,16988996 The Kite Runner,2007,Drama,15797907 21 Grams,2003,Drama,16248701 Paparazzi,2004,Crime,15712072 Twilight,2008,Romance,191449475 A Guy Thing,2003,Romance,15408822 Loser,2000,Comedy,15464026 The Greatest Story Ever Told,1965,History,8000000 Disaster Movie,2008,Comedy,14174654 Armored,2009,Thriller,15988876 The Man Who Knew Too Little,1997,Thriller,13801755 What's Your Number?,2011,Romance,13987482 Lockout,2012,Thriller,14291570 Envy,2004,Comedy,12181484 Crank: High Voltage,2009,Crime,13630226 Bullets Over Broadway,1994,Crime,13383737 One Night with the King,2006,Drama,13391174 The Quiet American,2002,War,12987647 The Weather Man,2005,Drama,12469811 Undisputed,2002,Action,12398628 Ghost Town,2008,Fantasy,13214030 12 Rounds,2009,Action,12232937 Let Me In,2010,Horror,12134420 3 Ninjas Kick Back,1994,Action,11784000 Be Kind Rewind,2008,Comedy,11169531 Mrs Henderson Presents,2005,War,11034436 Triple 9,2016,Crime,12626905 Deconstructing Harry,1997,Comedy,10569071 Three to Tango,1999,Romance,10544143 Burnt,2015,Comedy,13650738 We're No Angels,1989,Comedy,10555348 Everyone Says I Love You,1996,Musical,9714482 Death at a Funeral,2007,Comedy,8579684 Death Sentence,2007,Crime,9525276 Everybody's Fine,2009,Adventure,8855646 Superbabies: Baby Geniuses 2,2004,Family,9109322 The Man,2005,Action,8326035 Code Name: The Cleaner,2007,Crime,8104069 Connie and Carla,2004,Comedy,8054280 Inherent Vice,2014,Romance,8093318 Doogal,2006,Adventure,7382993 Battle of the Year,2013,Music,8888355 An American Carol,2008,Comedy,7001720 Machete Kills,2013,Action,7268659 Willard,2003,Horror,6852144 Strange Wilderness,2008,Adventure,6563357 Topsy-Turvy,1999,Drama,6201757 A Dangerous Method,2011,Thriller,5702083 A Scanner Darkly,2006,Mystery,5480996 Chasing Mavericks,2012,Sport,6002756 Alone in the Dark,2005,Sci-Fi,5132655 Bandslam,2009,Family,5205343 Birth,2004,Thriller,5005883 A Most Violent Year,2014,Crime,5749134 Flash of Genius,2008,Drama,4234040 I'm Not There.,2007,Drama,4001121 The Cold Light of Day,2012,Thriller,3749061 The Brothers Bloom,2008,Drama,3519627 "Synecdoche, New York",2008,Drama,3081925 Princess Mononoke,1997,Adventure,2298191 Bon voyage,2003,Mystery,2353728 Can't Stop the Music,1980,Musical,2000000 The Proposition,2005,Western,1900725 Courage,2015,Biography,2246000 Marci X,2003,Comedy,1646664 Equilibrium,2002,Thriller,1190018 The Children of Huang Shi,2008,War,1027749 The Yards,2000,Crime,882710 By the Sea,2015,Drama,531009 Steamboy,2004,Family,410388 The Game of Their Lives,2005,Drama,375474 Rapa Nui,1994,History,305070 Dylan Dog: Dead of Night,2010,Crime,1183354 People I Know,2002,Drama,121972 The Tempest,2010,Fantasy,263365 The Painted Veil,2006,Romance,8047690 The Baader Meinhof Complex,2008,Drama,476270 Dances with Wolves,1990,Adventure,184208848 Bad Teacher,2011,Comedy,100292856 Sea of Love,1989,Crime,58571513 A Cinderella Story,2004,Family,51431160 Scream,1996,Mystery,103001286 Thir13en Ghosts,2001,Horror,41867960 Back to the Future,1985,Sci-Fi,210609762 House on Haunted Hill,1999,Horror,40846082 I Can Do Bad All by Myself,2009,Comedy,51697449 The Switch,2010,Romance,27758465 Just Married,2003,Romance,56127162 The Devil's Double,2011,Biography,1357042 Thomas and the Magic Railroad,2000,Comedy,15911333 The Crazies,2010,Thriller,39103378 Spirited Away,2001,Family,10049886 The Bounty,1984,Adventure,8600000 The Book Thief,2013,Drama,21483154 Sex Drive,2008,Adventure,8396942 Leap Year,2010,Comedy,12561 Take Me Home Tonight,2011,Romance,6923891 The Nutcracker,1993,Fantasy,2119994 Kansas City,1996,Drama,1292527 The Amityville Horror,2005,Thriller,64255243 Adaptation.,2002,Drama,22245861 Land of the Dead,2005,Horror,20433940 Fear and Loathing in Las Vegas,1998,Comedy,10562387 The Invention of Lying,2009,Comedy,18439082 Neighbors,2014,Comedy,150056505 The Mask,1994,Action,119938730 Big,1988,Fantasy,114968774 Borat: Cultural Learnings of America for Make Benefit Glorious Nation of Kazakhstan,2006,Comedy,128505958 Legally Blonde,2001,Romance,95001351 Star Trek III: The Search for Spock,1984,Action,76400000 The Exorcism of Emily Rose,2005,Drama,75072454 Deuce Bigalow: Male Gigolo,1999,Romance,65535067 Left Behind,2014,Thriller,13998282 The Family Stone,2005,Comedy,6061759 Barbershop 2: Back in Business,2004,Drama,64955956 Bad Santa,2003,Drama,60057639 Austin Powers: International Man of Mystery,1997,Comedy,53868030 My Big Fat Greek Wedding 2,2016,Family,59573085 Diary of a Wimpy Kid: Rodrick Rules,2011,Comedy,52691009 Predator,1987,Sci-Fi,59735548 Amadeus,1984,History,51600000 Prom Night,2008,Horror,43818159 Mean Girls,2004,Comedy,86049418 Under the Tuscan Sun,2003,Romance,43601508 Gosford Park,2001,Mystery,41300105 Peggy Sue Got Married,1986,Comedy,41382841 Birdman or (The Unexpected Virtue of Ignorance),2014,Comedy,42335698 Blue Jasmine,2013,Drama,33404871 United 93,2006,History,31471430 Honey,2003,Drama,30222640 Glory,1989,History,26830000 Spy Hard,1996,Action,26906039 The Fog,1980,Fantasy,21378000 Soul Surfer,2011,Sport,43853424 Observe and Report,2009,Crime,23993605 Conan the Destroyer,1984,Fantasy,26400000 Raging Bull,1980,Drama,45250 Love Happens,2009,Drama,22927390 Young Sherlock Holmes,1985,Thriller,4250320 Fame,2009,Musical,22452209 127 Hours,2010,Thriller,18329466 Small Time Crooks,2000,Comedy,17071230 Center Stage,2000,Drama,17174870 Love the Coopers,2015,Comedy,26284475 Catch That Kid,2004,Comedy,16702864 Life as a House,2001,Drama,15561627 Steve Jobs,2015,Biography,17750583 "I Love You, Beth Cooper",2009,Comedy,14793904 Youth in Revolt,2009,Romance,15281286 The Legend of the Lone Ranger,1981,Western,8000000 The Tailor of Panama,2001,Thriller,13491653 Getaway,2013,Crime,10494494 The Ice Storm,1997,Drama,7837632 And So It Goes,2014,Drama,15155772 Troop Beverly Hills,1989,Comedy,8508843 Being Julia,2004,Drama,7739049 9½ Weeks,1986,Romance,6734844 Dragonslayer,1981,Adventure,6000000 The Last Station,2009,Drama,6615578 Ed Wood,1994,Biography,5887457 Labor Day,2013,Drama,13362308 Mongol: The Rise of Genghis Khan,2007,Biography,5701643 RocknRolla,2008,Crime,5694401 Megaforce,1982,Action,5333658 Hamlet,1996,Drama,4414535 Midnight Special,2016,Thriller,3707794 Anything Else,2003,Romance,3203044 The Railway Man,2013,Biography,4435083 The White Ribbon,2009,Drama,2222647 The Wraith,1986,Romance,3500000 The Salton Sea,2002,Drama,676698 One Man's Hero,1999,Western,229311 Renaissance,2006,Thriller,63260 Superbad,2007,Comedy,121463226 Step Up 2: The Streets,2008,Romance,58006147 Hoodwinked!,2005,Comedy,51053787 Hotel Rwanda,2004,Drama,23472900 Hitman,2007,Action,39687528 Black Nativity,2013,Family,7017178 City of Ghosts,2002,Crime,325491 The Others,2001,Horror,96471845 Aliens,1986,Action,85200000 My Fair Lady,1964,Romance,72000000 I Know What You Did Last Summer,1997,Mystery,72219395 Let's Be Cops,2014,Comedy,82389560 Sideways,2004,Adventure,71502303 Beerfest,2006,Comedy,19179969 Halloween,1978,Thriller,47000000 Hero,2002,Action,84961 Good Boy!,2003,Drama,37566230 The Best Man Holiday,2013,Comedy,70492685 Smokin' Aces,2006,Action,35635046 Saw 3D: The Final Chapter,2010,Mystery,45670855 40 Days and 40 Nights,2002,Romance,37939782 TRON: Legacy,2010,Action,172051787 A Night at the Roxbury,1998,Romance,30324946 Beastly,2011,Fantasy,27854896 The Hills Have Eyes,2006,Horror,41777564 Dickie Roberts: Former Child Star,2003,Comedy,22734486 "McFarland, USA",2015,Biography,44469602 Pitch Perfect,2012,Comedy,64998368 Summer Catch,2001,Comedy,19693891 A Simple Plan,1998,Drama,16311763 They,2002,Horror,12693621 Larry the Cable Guy: Health Inspector,2006,Comedy,15655665 The Adventures of Elmo in Grouchland,1999,Comedy,11634458 Brooklyn's Finest,2009,Drama,27154426 Evil Dead,2013,Horror,54239856 My Life in Ruins,2009,Romance,8662318 American Dreamz,2006,Music,7156725 Superman IV: The Quest for Peace,1987,Sci-Fi,15681020 Running Scared,2006,Drama,6855137 Shanghai Surprise,1986,Romance,2315683 The Illusionist,2006,Mystery,39825798 Roar,1981,Thriller,2000000 Veronica Guerin,2003,Crime,1569918 Southland Tales,2006,Thriller,273420 The Apparition,2012,Horror,4930798 My Girl,1991,Romance,59847242 Fur: An Imaginary Portrait of Diane Arbus,2006,Drama,220914 The Illusionist,2006,Drama,39825798 Wall Street,1987,Crime,43848100 Sense and Sensibility,1995,Drama,42700000 Becoming Jane,2007,Drama,18663911 Sydney White,2007,Comedy,11702090 House of Sand and Fog,2003,Drama,13005485 Dead Poets Society,1989,Drama,95860116 Dumb & Dumber,1994,Comedy,127175354 When Harry Met Sally...,1989,Romance,92823600 The Verdict,1982,Drama,54000000 Road Trip,2000,Comedy,68525609 Varsity Blues,1999,Sport,52885587 The Artist,2011,Comedy,44667095 The Unborn,2009,Fantasy,42638165 Moonrise Kingdom,2012,Comedy,45507053 The Texas Chainsaw Massacre: The Beginning,2006,Horror,39511038 The Young Messiah,2016,Drama,6462576 The Master of Disguise,2002,Family,40363530 Pan's Labyrinth,2006,War,37623143 See Spot Run,2001,Action,33357476 Baby Boy,2001,Crime,28734552 The Roommate,2011,Horror,37300107 Joe Dirt,2001,Comedy,27087695 Double Impact,1991,Crime,30102717 Hot Fuzz,2007,Action,23618786 The Women,2008,Drama,26896744 Vicky Cristina Barcelona,2008,Drama,23213577 Boys and Girls,2000,Drama,20627372 White Oleander,2002,Drama,16346122 Jennifer's Body,2009,Comedy,16204793 Drowning Mona,2000,Mystery,15427192 Radio Days,1987,Comedy,14792779 Left Behind,2014,Fantasy,13998282 Remember Me,2010,Romance,19057024 How to Deal,2003,Drama,14108518 My Stepmother Is an Alien,1988,Sci-Fi,13854000 Philadelphia,1993,Drama,77324422 The Thirteenth Floor,1999,Thriller,15500000 Duets,2000,Music,4734235 Hollywood Ending,2002,Romance,4839383 Detroit Rock City,1999,Comedy,4193025 Highlander,1986,Action,5900000 Things We Lost in the Fire,2007,Drama,2849142 Steel,1997,Crime,1686429 The Immigrant,2013,Drama,1984743 The White Countess,2005,History,1666262 Trance,2013,Thriller,2319187 Soul Plane,2004,Comedy,13922211 Good,2008,Romance,23091 Enter the Void,2009,Fantasy,336467 Vamps,2012,Romance,2964 The Homesman,2014,Drama,2428883 Juwanna Mann,2002,Drama,13571817 Slow Burn,2005,Thriller,1181197 Wasabi,2001,Drama,81525 Slither,2006,Comedy,7774730 Beverly Hills Cop,1984,Action,234760500 Home Alone,1990,Family,285761243 3 Men and a Baby,1987,Comedy,167780960 Tootsie,1982,Comedy,177200000 Top Gun,1986,Romance,176781728 "Crouching Tiger, Hidden Dragon",2000,Action,128067808 American Beauty,1999,Drama,130058047 The King's Speech,2010,History,138795342 Twins,1988,Crime,111936400 The Yellow Handkerchief,2008,Romance,317040 The Color Purple,1985,Drama,94175854 The Imitation Game,2014,War,91121452 Private Benjamin,1980,War,69800000 Diary of a Wimpy Kid,2010,Family,64001297 Mama,2013,Horror,71588220 Halloween,1978,Thriller,47000000 National Lampoon's Vacation,1983,Comedy,61400000 Bad Grandpa,2013,Comedy,101978840 The Queen,2006,Biography,56437947 Beetlejuice,1988,Fantasy,73326666 Why Did I Get Married?,2007,Comedy,55184721 Little Women,1994,Family,50003300 The Woman in Black,2012,Horror,54322273 When a Stranger Calls,2006,Thriller,47860214 Big Fat Liar,2002,Adventure,47811275 Wag the Dog,1997,Drama,43022524 The Lizzie McGuire Movie,2003,Romance,42672630 Snitch,2013,Action,42919096 Krampus,2015,Fantasy,42592530 The Faculty,1998,Sci-Fi,40064955 Cop Land,1997,Thriller,44886089 Not Another Teen Movie,2001,Comedy,37882551 End of Watch,2012,Drama,40983001 Aloha,2015,Romance,20991497 The Skulls,2000,Action,35007180 The Theory of Everything,2014,Romance,35887263 Malibu's Most Wanted,2003,Crime,34308901 Where the Heart Is,2000,Drama,33771174 Lawrence of Arabia,1962,History,6000000 Halloween II,2009,Horror,33386128 Wild,2014,Biography,37877959 The Last House on the Left,2009,Crime,32721635 The Wedding Date,2005,Romance,31585300 Halloween: Resurrection,2002,Comedy,30259652 Clash of the Titans,2010,Adventure,163192114 The Princess Bride,1987,Adventure,30857814 The Great Debaters,2007,Drama,30226144 Drive,2011,Crime,35054909 Confessions of a Teenage Drama Queen,2004,Comedy,29302097 The Object of My Affection,1998,Drama,29106737 28 Weeks Later,2007,Horror,28637507 When the Game Stands Tall,2014,Family,30127963 Because of Winn-Dixie,2005,Comedy,32645546 Love & Basketball,2000,Drama,27441122 Grosse Pointe Blank,1997,Crime,28014536 All About Steve,2009,Comedy,33860010 Book of Shadows: Blair Witch 2,2000,Mystery,26421314 The Craft,1996,Horror,24881000 Match Point,2005,Thriller,23089926 Ramona and Beezus,2010,Family,26161406 The Remains of the Day,1993,Drama,22954968 Boogie Nights,1997,Drama,26384919 Nowhere to Run,1993,Drama,22189039 Flicka,2006,Family,20998709 The Hills Have Eyes II,2007,Horror,20801344 Urban Legends: Final Cut,2000,Thriller,21468807 Tuck Everlasting,2002,Fantasy,19158074 The Marine,2006,Thriller,18843314 Keanu,2016,Comedy,20566327 Country Strong,2010,Music,20218921 Disturbing Behavior,1998,Sci-Fi,17411331 The Place Beyond the Pines,2012,Crime,21383298 The November Man,2014,Thriller,24984868 Eye of the Beholder,1999,Mystery,16459004 The Hurt Locker,2008,Drama,15700000 Firestarter,1984,Sci-Fi,15100000 Killing Them Softly,2012,Crime,14938570 A Most Wanted Man,2014,Thriller,17237244 Freddy Got Fingered,2001,Comedy,14249005 The Pirates Who Don't Do Anything: A VeggieTales Movie,2008,Animation,12701880 Highlander: Endgame,2000,Sci-Fi,12801190 Idlewild,2006,Romance,12549485 One Day,2011,Drama,13766014 Whip It,2009,Sport,13034417 Confidence,2003,Crime,12212417 The Muse,1999,Comedy,11614236 De-Lovely,2004,Drama,13337299 New York Stories,1989,Drama,10763469 Barney's Great Adventure,1998,Family,11144518 The Man with the Iron Fists,2012,Action,15608545 Home Fries,1998,Drama,10443316 Here on Earth,2000,Romance,10494147 Brazil,1985,Drama,9929000 Raise Your Voice,2004,Music,10411980 The Big Lebowski,1998,Comedy,17439163 Black Snake Moan,2006,Music,9396487 Dark Blue,2002,Crime,9059588 A Mighty Heart,2007,Thriller,9172810 Whatever It Takes,2000,Drama,8735529 Boat Trip,2002,Comedy,8586376 The Importance of Being Earnest,2002,Comedy,8378141 Hoot,2006,Family,8080116 In Bruges,2008,Crime,7757130 Peeples,2013,Romance,9123834 The Rocker,2008,Music,6409206 Post Grad,2009,Comedy,6373693 Promised Land,2012,Drama,7556708 Whatever Works,2009,Comedy,5306447 The In Crowd,2000,Thriller,5217498 Three Burials,2005,Crime,5023275 Jakob the Liar,1999,Drama,4956401 Kiss Kiss Bang Bang,2005,Comedy,4235837 Idle Hands,1999,Comedy,4002955 Mulholland Drive,2001,Drama,7219578 You Will Meet a Tall Dark Stranger,2010,Comedy,3247816 Never Let Me Go,2010,Sci-Fi,2412045 Transsiberian,2008,Drama,2203641 The Clan of the Cave Bear,1986,Drama,1953732 Crazy in Alabama,1999,Comedy,1954202 Funny Games,2007,Crime,1294640 Metropolis,1927,Drama,26435 District B13,2004,Crime,1197786 Things to Do in Denver When You're Dead,1995,Drama,529766 The Assassin,2015,Drama,613556 Buffalo Soldiers,2001,Crime,353743 Ong-bak 2,2008,Action,102055 The Midnight Meat Train,2008,Fantasy,73548 The Son of No One,2011,Drama,28870 All the Queen's Men,2001,Action,22723 The Good Night,2007,Drama,20380 Groundhog Day,1993,Fantasy,70906973 Magic Mike XXL,2015,Music,66009973 Romeo + Juliet,1996,Drama,46338728 Sarah's Key,2010,Drama,7691700 Unforgiven,1992,Western,101157447 Manderlay,2005,Drama,74205 Slumdog Millionaire,2008,Drama,141319195 Fatal Attraction,1987,Romance,156645693 Pretty Woman,1990,Romance,178406268 Crocodile Dundee II,1988,Action,109306210 Born on the Fourth of July,1989,Biography,70001698 Cool Runnings,1993,Adventure,68856263 My Bloody Valentine,2009,Horror,51527787 The Possession,2012,Thriller,49122319 Stomp the Yard,2007,Drama,61356221 The Spy Who Loved Me,1977,Sci-Fi,46800000 Urban Legend,1998,Thriller,38048637 Dangerous Liaisons,1988,Romance,34700000 White Fang,1991,Drama,34793160 Superstar,1999,Romance,30628981 The Iron Lady,2011,Drama,29959436 Jonah: A VeggieTales Movie,2002,Animation,25571351 Poetic Justice,1993,Drama,27515786 All About the Benjamins,2002,Crime,25482931 Vampire in Brooklyn,1995,Horror,19900000 An American Haunting,2005,Horror,16298046 My Boss's Daughter,2003,Comedy,15549702 A Perfect Getaway,2009,Adventure,15483540 Our Family Wedding,2010,Comedy,20246959 Dead Man on Campus,1998,Comedy,15062898 Tea with Mussolini,1999,Comedy,14348123 Thinner,1996,Fantasy,15171475 Crooklyn,1994,Drama,13640000 Jason X,2001,Thriller,12610731 Big Fat Liar,2002,Comedy,47811275 Bobby,2006,History,11204499 Head Over Heels,2001,Romance,10397365 Fun Size,2012,Adventure,9402410 Little Children,2006,Drama,5459824 Gossip,2000,Thriller,5108820 A Walk on the Moon,1999,Drama,4741987 Catch a Fire,2006,Biography,4291965 Soul Survivors,2001,Drama,3100650 Jefferson in Paris,1995,History,2474000 Caravans,1978,Adventure,1000000 Mr. Turner,2014,Drama,3958500 Amen.,2002,Biography,274299 The Lucky Ones,2008,Drama,183088 Margaret,2011,Drama,46495 Flipped,2010,Drama,1752214 Brokeback Mountain,2005,Romance,83025853 Teenage Mutant Ninja Turtles,2014,Action,190871240 Clueless,1995,Romance,56631572 Far from Heaven,2002,Drama,15854988 Hot Tub Time Machine 2,2015,Comedy,12282677 Quills,2000,Drama,7060876 Seven Psychopaths,2012,Comedy,14989761 Downfall,2004,Drama,5501940 The Sea Inside,2004,Drama,2086345 "Good Morning, Vietnam",1987,Biography,123922370 The Last Godfather,2010,Comedy,163591 Justin Bieber: Never Say Never,2011,Music,73000942 Black Swan,2010,Drama,106952327 RoboCop,2014,Action,58607007 The Godfather: Part II,1974,Drama,57300000 Save the Last Dance,2001,Drama,91038276 A Nightmare on Elm Street 4: The Dream Master,1988,Horror,49369900 Miracles from Heaven,2016,Drama,61693523 "Dude, Where's My Car?",2000,Comedy,46729374 Young Guns,1988,Western,44726644 St. Vincent,2014,Comedy,44134898 About Last Night,2014,Comedy,48637684 10 Things I Hate About You,1999,Drama,38176108 The New Guy,2002,Comedy,28972187 Loaded Weapon 1,1993,Crime,27979400 The Shallows,2016,Thriller,54257433 The Butterfly Effect,2004,Thriller,23947 Snow Day,2000,Comedy,60008303 This Christmas,2007,Romance,49121934 Baby Geniuses,1999,Crime,27141959 The Big Hit,1998,Comedy,27052167 Harriet the Spy,1996,Drama,26539321 Child's Play 2,1990,Horror,28501605 No Good Deed,2014,Crime,52543632 The Mist,2007,Horror,25592632 Ex Machina,2015,Drama,25440971 Being John Malkovich,1999,Comedy,22858926 Two Can Play That Game,2001,Comedy,22235901 Earth to Echo,2014,Family,38916903 Crazy/Beautiful,2001,Romance,16929123 Letters from Iwo Jima,2006,History,13753931 The Astronaut Farmer,2006,Drama,10996440 Room,2015,Drama,14677654 Dirty Work,1998,Comedy,9975684 Serial Mom,1994,Thriller,7881335 Dick,1999,Comedy,6241697 Light It Up,1999,Thriller,5871603 54,1998,Music,16574731 Bubble Boy,2001,Comedy,5002310 Birthday Girl,2001,Crime,4919896 21 & Over,2013,Comedy,25675765 "Paris, je t'aime",2006,Romance,4857376 Resurrecting the Champ,2007,Drama,3169424 Admission,2013,Romance,18004225 The Widow of Saint-Pierre,2000,Drama,3058380 Chloe,2009,Mystery,3074838 Faithful,1996,Drama,2104000 Brothers,2009,Drama,28501651 Find Me Guilty,2006,Crime,1172769 The Perks of Being a Wallflower,2012,Drama,17738570 Excessive Force,1993,Action,1200000 Infamous,2006,Crime,1150403 The Claim,2000,Drama,403932 The Vatican Tapes,2015,Thriller,1712111 Attack the Block,2011,Thriller,1024175 In the Land of Blood and Honey,2011,Drama,301305 The Call,2013,Thriller,51872378 The Crocodile Hunter: Collision Course,2002,Comedy,28399192 I Love You Phillip Morris,2009,Romance,2035566 Antwone Fisher,2002,Biography,21078145 The Emperor's Club,2002,Drama,14060950 True Romance,1993,Thriller,12281500 Glengarry Glen Ross,1992,Crime,10725228 The Killer Inside Me,2010,Drama,214966 Sorority Row,2009,Horror,11956207 Lars and the Real Girl,2007,Romance,5949693 The Boy in the Striped Pajamas,2008,Drama,9030581 Dancer in the Dark,2000,Musical,4157491 Oscar and Lucinda,1997,Romance,1508689 The Funeral,1996,Crime,1227324 Solitary Man,2009,Romance,4360548 Machete,2010,Thriller,26589953 Casino Jack,2010,Comedy,1039869 The Land Before Time,1988,Adventure,48092846 Tae Guk Gi: The Brotherhood of War,2004,Action,1110186 The Perfect Game,2009,Drama,1089445 The Exorcist,1973,Horror,204565000 Jaws,1975,Adventure,260000000 American Pie,1999,Comedy,101736215 Ernest & Celestine,2012,Crime,71442 The Golden Child,1986,Action,79817937 Think Like a Man,2012,Comedy,91547205 Barbershop,2002,Drama,75074950 Star Trek II: The Wrath of Khan,1982,Action,78900000 Ace Ventura: Pet Detective,1994,Comedy,72217000 WarGames,1983,Sci-Fi,79568000 Witness,1985,Romance,65500000 Act of Valor,2012,War,70011073 Step Up,2006,Crime,65269010 Beavis and Butt-Head Do America,1996,Crime,63071133 Jackie Brown,1997,Thriller,39647595 Harold & Kumar Escape from Guantanamo Bay,2008,Comedy,38087366 Chronicle,2012,Sci-Fi,64572496 Yentl,1983,Drama,30400000 Time Bandits,1981,Sci-Fi,42365600 Crossroads,2002,Drama,37188667 Project X,2012,Comedy,54724272 One Hour Photo,2002,Drama,31597131 Quarantine,2008,Sci-Fi,31691811 The Eye,2008,Mystery,31397498 Johnson Family Vacation,2004,Comedy,31179516 How High,2001,Fantasy,31155435 The Muppet Christmas Carol,1992,Fantasy,27281507 Casino Royale,2006,Thriller,167007184 Frida,2002,Romance,25776062 Katy Perry: Part of Me,2012,Music,25240988 The Fault in Our Stars,2014,Romance,124868837 Rounders,1998,Crime,22905674 Top Five,2014,Romance,25277561 Stir of Echoes,1999,Mystery,21133087 Philomena,2013,Drama,37707719 The Upside of Anger,2005,Comedy,18761993 Aquamarine,2006,Romance,18595716 Paper Towns,2015,Drama,31990064 Nebraska,2013,Drama,17613460 Tales from the Crypt: Demon Knight,1995,Thriller,21088568 Max Keeble's Big Move,2001,Comedy,17292381 Young Adult,2011,Comedy,16300302 Crank,2006,Thriller,27829874 Living Out Loud,1998,Drama,12902790 Das Boot,1981,Adventure,11433134 The Alamo,2004,War,22406362 Sorority Boys,2002,Comedy,10198766 About Time,2013,Romance,15294553 House of Flying Daggers,2004,Adventure,11041228 Arbitrage,2012,Drama,7918283 Project Almanac,2015,Sci-Fi,22331028 Cadillac Records,2008,Music,8134217 Screwed,2000,Comedy,6982680 Fortress,1992,Crime,6739141 For Your Consideration,2006,Comedy,5542025 Celebrity,1998,Drama,5032496 Running with Scissors,2006,Comedy,6754898 From Justin to Kelly,2003,Musical,4922166 Girl 6,1996,Comedy,4903000 In the Cut,2003,Mystery,4717455 Two Lovers,2008,Drama,3148482 Last Orders,2001,Drama,2326407 The Host,2006,Horror,2201412 Ravenous,1999,Fantasy,2060953 Charlie Bartlett,2007,Drama,3950294 The Great Beauty,2013,Drama,2835886 The Dangerous Lives of Altar Boys,2002,Drama,1779284 Stoker,2013,Drama,1702277 2046,2004,Sci-Fi,261481 Married Life,2007,Romance,1506998 Duma,2005,Family,860002 Ondine,2009,Drama,548934 Brother,2000,Drama,447750 Welcome to Collinwood,2002,Comedy,333976 Critical Care,1997,Comedy,141853 The Life Before Her Eyes,2007,Drama,303439 Trade,2007,Thriller,214202 Fateless,2005,Romance,195888 Breakfast of Champions,1999,Comedy,175370 City of Life and Death,2009,War,119922 Home,2015,Adventure,177343675 5 Days of War,2011,Action,17149 Snatch,2000,Comedy,30093107 Pet Sematary,1989,Fantasy,57469179 Gremlins,1984,Horror,148170000 Star Wars: Episode IV - A New Hope,1977,Sci-Fi,460935665 Dirty Grandpa,2016,Comedy,35537564 Doctor Zhivago,1965,Drama,111722000 High School Musical 3: Senior Year,2008,Comedy,90556401 The Fighter,2010,Drama,93571803 My Cousin Vinny,1992,Comedy,52929168 If I Stay,2014,Drama,50461335 Major League,1989,Sport,49797148 Phone Booth,2002,Crime,46563158 A Walk to Remember,2002,Drama,41227069 Dead Man Walking,1995,Crime,39025000 Cruel Intentions,1999,Romance,38201895 Saw VI,2009,Mystery,27669413 The Secret Life of Bees,2008,Drama,37766350 Corky Romano,2001,Comedy,23978402 Raising Cain,1992,Drama,21370057 Invaders from Mars,1986,Horror,4884663 Brooklyn,2015,Romance,38317535 Out Cold,2001,Comedy,13903262 The Ladies Man,2000,Comedy,13592872 Quartet,2012,Drama,18381787 Tomcats,2001,Comedy,13558739 Frailty,2001,Thriller,13103828 Woman in Gold,2015,Drama,33305037 Kinsey,2004,Drama,10214647 Army of Darkness,1992,Horror,11501093 Slackers,2002,Comedy,4814244 What's Eating Gilbert Grape,1993,Drama,9170214 The Visual Bible: The Gospel of John,2003,History,4068087 Vera Drake,2004,Drama,3753806 The Guru,2002,Romance,3034181 The Perez Family,1995,Comedy,2832826 Inside Llewyn Davis,2013,Drama,13214255 O,2001,Drama,16017403 Return to the Blue Lagoon,1991,Adventure,2807854 Copying Beethoven,2006,Music,352786 Poltergeist,1982,Horror,76600000 Saw V,2008,Mystery,56729973 Jindabyne,2006,Thriller,399879 Kabhi Alvida Naa Kehna,2006,Drama,3275443 An Ideal Husband,1999,Romance,18535191 The Last Days on Mars,2013,Thriller,23838 Darkness,2002,Horror,22160085 2001: A Space Odyssey,1968,Sci-Fi,56715371 E.T. the Extra-Terrestrial,1982,Family,434949459 In the Land of Women,2007,Drama,11043445 For Greater Glory: The True Story of Cristiada,2012,History,5669081 Good Will Hunting,1997,Drama,138339411 Saw III,2006,Horror,80150343 Stripes,1981,Action,85300000 Bring It On,2000,Sport,68353550 The Purge: Election Year,2016,Horror,78845130 She's All That,1999,Romance,63319509 Precious,2009,Drama,47536959 Saw IV,2007,Mystery,63270259 White Noise,2005,Drama,55865715 Madea's Family Reunion,2006,Drama,63231524 The Color of Money,1986,Drama,52293982 The Mighty Ducks,1992,Sport,50752337 The Grudge,2004,Mystery,110175871 Happy Gilmore,1996,Comedy,38624000 Jeepers Creepers,2001,Horror,37470017 Bill & Ted's Excellent Adventure,1989,Comedy,40485039 Oliver!,1968,Musical,16800000 The Best Exotic Marigold Hotel,2011,Drama,46377022 Recess: School's Out,2001,Family,36696761 Mad Max Beyond Thunderdome,1985,Sci-Fi,36200000 The Boy,2016,Thriller,35794166 Devil,2010,Thriller,33583175 Friday After Next,2002,Comedy,32983713 Insidious: Chapter 3,2015,Fantasy,52200504 The Last Dragon,1985,Comedy,33000000 Snatch,2000,Crime,30093107 The Lawnmower Man,1992,Sci-Fi,32101000 Nick and Norah's Infinite Playlist,2008,Music,31487293 Dogma,1999,Adventure,30651422 The Banger Sisters,2002,Comedy,30306281 Twilight Zone: The Movie,1983,Horror,29500000 Road House,1989,Action,30050028 A Low Down Dirty Shame,1994,Comedy,29392418 Swimfan,2002,Thriller,28563926 Employee of the Month,2006,Comedy,28435406 Can't Hardly Wait,1998,Comedy,25339117 The Outsiders,1983,Crime,25600000 Sinister 2,2015,Thriller,27736779 Sparkle,2012,Music,24397469 Valentine,2001,Horror,20384136 The Fourth Kind,2009,Sci-Fi,25464480 A Prairie Home Companion,2006,Music,20338609 Sugar Hill,1993,Thriller,18272447 Rushmore,1998,Comedy,17096053 Skyline,2010,Sci-Fi,21371425 The Second Best Exotic Marigold Hotel,2015,Comedy,33071558 Kit Kittredge: An American Girl,2008,Family,17655201 The Perfect Man,2005,Romance,16247775 Mo' Better Blues,1990,Drama,16153600 Kung Pow: Enter the Fist,2002,Action,16033556 Tremors,1990,Horror,16667084 Wrong Turn,2003,Thriller,15417771 The Corruptor,1999,Crime,15156200 Mud,2012,Drama,21589307 Reno 911!: Miami,2007,Comedy,20339754 One Direction: This Is Us,2013,Documentary,28873374 Hey Arnold! The Movie,2002,Family,13684949 My Week with Marilyn,2011,Drama,14597405 The Matador,2005,Thriller,12570442 Love Jones,1997,Drama,12514138 The Gift,2015,Mystery,43771291 End of the Spear,2005,Adventure,11703287 Get Over It,2001,Comedy,11560259 Office Space,1999,Comedy,10824921 Drop Dead Gorgeous,1999,Thriller,10561238 Big Eyes,2014,Biography,14479776 Very Bad Things,1998,Comedy,9801782 Sleepover,2004,Romance,8070311 MacGruber,2010,Action,8460995 Dirty Pretty Things,2002,Thriller,8111360 Movie 43,2013,Comedy,8828771 The Tourist,2010,Romance,67631157 Over Her Dead Body,2008,Romance,7563670 Seeking a Friend for the End of the World,2012,Adventure,6619173 American History X,1998,Drama,6712241 The Collection,2012,Thriller,6842058 Teacher's Pet,2004,Comedy,6491350 The Red Violin,1998,Romance,9473382 The Straight Story,1999,Drama,6197866 Deuces Wild,2002,Drama,6044618 Bad Words,2013,Comedy,7764027 Black or White,2014,Drama,21569041 On the Line,2001,Romance,4356743 Rescue Dawn,2006,Drama,5484375 "Jeff, Who Lives at Home",2011,Comedy,4244155 I Am Love,2009,Romance,5004648 Atlas Shrugged II: The Strike,2012,Drama,3333823 Romeo Is Bleeding,1993,Crime,3275585 The Limey,1999,Thriller,3193102 Crash,2004,Thriller,54557348 The House of Mirth,2000,Romance,3041803 Malone,1987,Thriller,3060858 Peaceful Warrior,2006,Drama,1055654 Bucky Larson: Born to Be a Star,2011,Comedy,2331318 Bamboozled,2000,Music,2185266 The Forest,2016,Thriller,26583369 Sphinx,1981,Adventure,800000 While We're Young,2014,Drama,7574066 A Better Life,2011,Drama,1754319 Spider,2002,Drama,1641788 Gun Shy,2000,Comedy,1631839 Nicholas Nickleby,2002,Drama,1309849 The Iceman,2012,Drama,1939441 Cecil B. DeMented,2000,Thriller,1276984 Killer Joe,2011,Romance,1987762 The Joneses,2009,Drama,1474508 Owning Mahowny,2003,Drama,1011054 The Brothers Solomon,2007,Comedy,900926 My Blueberry Nights,2007,Drama,866778 Swept Away,2002,Romance,598645 "War, Inc.",2008,Action,578527 Shaolin Soccer,2001,Action,488872 The Brown Bunny,2003,Drama,365734 Rosewater,2014,Biography,3093491 Imaginary Heroes,2004,Drama,228524 High Heels and Low Lifes,2001,Comedy,226792 Severance,2006,Thriller,136432 Edmond,2005,Drama,131617 Police Academy: Mission to Moscow,1994,Crime,126247 An Alan Smithee Film: Burn Hollywood Burn,1997,Comedy,15447 The Open Road,2009,Comedy,19348 The Good Guy,2009,Romance,100503 Motherhood,2009,Drama,92900 Blonde Ambition,2007,Comedy,5561 The Oxford Murders,2008,Thriller,3607 Eulogy,2004,Comedy,70527 "The Good, the Bad, the Weird",2008,Action,128486 The Lost City,2005,Drama,2483955 Next Friday,2000,Comedy,57176582 You Only Live Twice,1967,Adventure,43100000 Amour,2012,Drama,225377 Poltergeist III,1988,Horror,14114488 "It's a Mad, Mad, Mad, Mad World",1963,Comedy,46300000 Richard III,1995,War,2600000 Melancholia,2011,Drama,3029870 Jab Tak Hai Jaan,2012,Drama,3047539 Alien,1979,Sci-Fi,78900000 The Texas Chain Saw Massacre,1974,Horror,30859000 The Runaways,2010,Music,3571735 Fiddler on the Roof,1971,Romance,50000000 Thunderball,1965,Adventure,63600000 Set It Off,1996,Action,36049108 The Best Man,1999,Drama,34074895 Child's Play,1988,Horror,33244684 Sicko,2007,Drama,24530513 The Purge: Anarchy,2014,Horror,71519230 Down to You,2000,Romance,20035310 Harold & Kumar Go to White Castle,2004,Adventure,18225165 The Contender,2000,Drama,17804273 Boiler Room,2000,Thriller,16938179 Black Christmas,2006,Horror,16235293 Henry V,1989,War,10161099 The Way of the Gun,2000,Action,6047856 Igby Goes Down,2002,Drama,4681503 PCU,1994,Comedy,4350774 Gracie,2007,Drama,2955039 Trust the Man,2005,Romance,1530535 Hamlet 2,2008,Comedy,4881867 Glee: The 3D Concert Movie,2011,Music,11860839 The Legend of Suriyothai,2001,Adventure,454255 Two Evil Eyes,1990,Horror,349618 All or Nothing,2002,Drama,112935 Princess Kaiulani,2009,Drama,883887 Opal Dream,2006,Drama,13751 Flame and Citron,2008,Drama,145109 Undiscovered,2005,Comedy,1046166 Crocodile Dundee,1986,Comedy,174635000 Awake,2007,Crime,14373825 Skin Trade,2014,Action,162 Crazy Heart,2009,Drama,39462438 The Rose,1979,Romance,29200000 Baggage Claim,2013,Comedy,21564616 Election,1999,Drama,14879556 The DUFF,2015,Comedy,34017854 Glitter,2001,Drama,4273372 Bright Star,2009,Drama,4440055 My Name Is Khan,2010,Drama,4018695 Footloose,1984,Romance,80000000 Limbo,1999,Adventure,1997807 The Karate Kid,1984,Drama,90800000 Repo! The Genetic Opera,2008,Musical,140244 Pulp Fiction,1994,Drama,107930000 Nightcrawler,2014,Thriller,32279955 Club Dread,2004,Thriller,4992159 The Sound of Music,1965,Family,163214286 Splash,1984,Fantasy,69800000 Little Miss Sunshine,2006,Comedy,59889948 Stand by Me,1986,Adventure,52287414 28 Days Later...,2002,Drama,45063889 You Got Served,2004,Drama,40066497 Escape from Alcatraz,1979,Biography,36500000 Brown Sugar,2002,Comedy,27362712 A Thin Line Between Love and Hate,1996,Comedy,34746109 50/50,2011,Romance,34963967 Shutter,2008,Horror,25926543 That Awkward Moment,2014,Romance,26049082 Much Ado About Nothing,1993,Drama,22551000 On Her Majesty's Secret Service,1969,Adventure,22800000 New Nightmare,1994,Fantasy,18090181 Drive Me Crazy,1999,Comedy,17843379 Half Baked,1998,Crime,17278980 New in Town,2009,Comedy,16699684 Syriana,2005,Thriller,50815288 American Psycho,2000,Crime,15047419 The Good Girl,2002,Romance,14015786 The Boondock Saints II: All Saints Day,2009,Crime,10269307 Enough Said,2013,Comedy,17536788 Easy A,2010,Romance,58401464 Shadow of the Vampire,2000,Horror,8279017 Prom,2011,Drama,10106233 Held Up,1999,Comedy,4692814 Woman on Top,2000,Comedy,5018450 Anomalisa,2015,Animation,3442820 Another Year,2010,Comedy,3205244 8 Women,2002,Romance,3076425 Showdown in Little Tokyo,1991,Thriller,2275557 Clay Pigeons,1998,Crime,1789892 It's Kind of a Funny Story,2010,Comedy,6350058 Made in Dagenham,2010,History,1094798 When Did You Last See Your Father?,2007,Biography,1071240 Prefontaine,1997,Biography,532190 The Secret of Kells,2009,Animation,686383 Begin Again,2013,Drama,16168741 Down in the Valley,2005,Drama,568695 Brooklyn Rules,2007,Crime,398420 The Singing Detective,2003,Comedy,336456 Fido,2006,Horror,298110 The Wendell Baker Story,2005,Comedy,127144 Wild Target,2010,Crime,117190 Pathology,2008,Horror,108662 10th & Wolf,2006,Thriller,53481 Dear Wendy,2004,Romance,23106 Akira,1988,Sci-Fi,439162 Imagine Me & You,2005,Comedy,671240 The Blood of Heroes,1989,Sci-Fi,882290 Driving Miss Daisy,1989,Drama,106593296 Soul Food,1997,Comedy,43490057 Rumble in the Bronx,1995,Action,32333860 Thank You for Smoking,2005,Comedy,24792061 Hostel: Part II,2007,Horror,17544812 An Education,2009,Drama,12574715 The Hotel New Hampshire,1984,Drama,5100000 Narc,2002,Mystery,10460089 Men with Brooms,2002,Romance,4239767 Witless Protection,2008,Crime,4131640 Extract,2009,Crime,10814185 Code 46,2003,Thriller,197148 Crash,2004,Thriller,54557348 Albert Nobbs,2011,Drama,3014541 Persepolis,2007,War,4443403 The Neon Demon,2016,Thriller,1330827 Harry Brown,2009,Action,1818681 Spider-Man 3,2007,Romance,336530303 The Omega Code,1999,Action,12610552 Juno,2007,Drama,143492840 Diamonds Are Forever,1971,Adventure,43800000 The Godfather,1972,Drama,134821952 Flashdance,1983,Music,94900000 500 Days of Summer,2009,Comedy,32391374 The Piano,1993,Drama,40158000 Magic Mike,2012,Comedy,113709992 Darkness Falls,2003,Thriller,32131483 Live and Let Die,1973,Action,35400000 My Dog Skip,2000,Family,34099640 Jumping the Broom,2011,Drama,37295394 The Great Gatsby,2013,Drama,144812796 "Good Night, and Good Luck.",2005,Drama,31501218 Capote,2005,Biography,28747570 Desperado,1995,Thriller,25625110 The Claim,2000,Western,403932 Logan's Run,1976,Sci-Fi,25000000 The Man with the Golden Gun,1974,Adventure,21000000 Action Jackson,1988,Comedy,20257000 The Descent,2005,Horror,26005908 Devil's Due,2014,Horror,15818967 Flirting with Disaster,1996,Comedy,14891000 The Devil's Rejects,2005,Crime,16901126 Dope,2015,Drama,17474107 In Too Deep,1999,Drama,14003141 Skyfall,2012,Thriller,304360277 House of 1000 Corpses,2003,Horror,12583510 A Serious Man,2009,Comedy,9190525 Get Low,2009,Mystery,9176553 Warlock,1989,Horror,9094451 A Single Man,2009,Drama,9166863 The Last Temptation of Christ,1988,Drama,8373585 Outside Providence,1999,Romance,7292175 Bride & Prejudice,2004,Musical,6601079 Rabbit-Proof Fence,2002,Biography,6165429 Who's Your Caddy?,2007,Comedy,5694308 Split Second,1992,Crime,5430822 The Other Side of Heaven,2001,Drama,4720371 Redbelt,2008,Sport,2344847 Cyrus,2010,Drama,7455447 A Dog of Flanders,1999,Family,2148212 Auto Focus,2002,Drama,2062066 Factory Girl,2006,Drama,1654367 We Need to Talk About Kevin,2011,Drama,1738692 The Mighty Macs,2009,Sport,1889522 Mother and Child,2009,Drama,1110286 March or Die,1977,Drama,1000000 Les visiteurs,1993,Comedy,700000 Somewhere,2010,Drama,1768416 Chairman of the Board,1998,Comedy,306715 Hesher,2010,Drama,382946 The Heart of Me,2002,Romance,196067 Freeheld,2015,Biography,532988 The Extra Man,2010,Comedy,453079 Ca$h,2010,Crime,46451 Wah-Wah,2005,Drama,233103 Pale Rider,1985,Western,41400000 Dazed and Confused,1993,Comedy,7993039 The Chumscrubber,2005,Comedy,49526 Shade,2003,Thriller,10696 House at the End of the Street,2012,Horror,31607598 Incendies,2010,Drama,6857096 "Remember Me, My Love",2003,Romance,223878 Elite Squad,2007,Crime,8060 Annabelle,2014,Horror,84263837 Bran Nue Dae,2009,Musical,110029 Boyz n the Hood,1991,Drama,57504069 La Bamba,1987,Music,54215416 Dressed to Kill,1980,Romance,31899000 The Adventures of Huck Finn,1993,Family,24103594 Go,1999,Comedy,16842303 Friends with Money,2006,Comedy,13367101 Bats,1999,Thriller,10149779 Nowhere in Africa,2001,Biography,6173485 Layer Cake,2004,Drama,2338695 The Work and the Glory II: American Zion,2005,Drama,2024854 The East,2013,Drama,2268296 A Home at the End of the World,2004,Romance,1029017 The Messenger,2009,Drama,66637 Control,2007,Biography,871577 The Terminator,1984,Sci-Fi,38400000 Good Bye Lenin!,2003,Drama,4063859 The Damned United,2009,Drama,449558 Mallrats,1995,Romance,2122561 Grease,1978,Romance,181360000 Platoon,1986,War,137963328 Fahrenheit 9/11,2004,Drama,119078393 Butch Cassidy and the Sundance Kid,1969,Biography,102308900 Mary Poppins,1964,Comedy,102300000 Ordinary People,1980,Drama,54800000 Around the World in 80 Days,2004,Comedy,24004159 West Side Story,1961,Romance,43650000 Caddyshack,1980,Comedy,39800000 The Brothers,2001,Drama,27457409 The Wood,1999,Romance,25047631 The Usual Suspects,1995,Crime,23272306 A Nightmare on Elm Street 5: The Dream Child,1989,Thriller,22168359 Van Wilder: Party Liaison,2002,Romance,21005329 The Wrestler,2008,Drama,26236603 Duel in the Sun,1946,Western,20400000 Best in Show,2000,Comedy,18621249 Escape from New York,1981,Sci-Fi,25244700 School Daze,1988,Comedy,14545844 Daddy Day Camp,2007,Comedy,13235267 Mystic Pizza,1988,Drama,12793213 Sliding Doors,1998,Drama,11883495 Tales from the Hood,1995,Horror,11797927 The Last King of Scotland,2006,Biography,17605861 Halloween 5,1989,Thriller,11642254 Bernie,2011,Crime,9203192 Pollock,2000,Biography,8596914 200 Cigarettes,1999,Drama,6851636 The Words,2012,Mystery,11434867 Casa de mi Padre,2012,Western,5895238 City Island,2009,Drama,6670712 The Guard,2011,Comedy,5359774 College,2008,Comedy,4693919 The Virgin Suicides,1999,Drama,4859475 Miss March,2009,Romance,4542775 Wish I Was Here,2014,Drama,3588432 Simply Irresistible,1999,Romance,4394936 Hedwig and the Angry Inch,2001,Music,3029081 Only the Strong,1993,Action,3273588 Shattered Glass,2003,Drama,2207975 Novocaine,2001,Comedy,2025238 The Wackness,2008,Romance,2077046 Beastmaster 2: Through the Portal of Time,1991,Fantasy,869325 The 5th Quarter,2010,Sport,399611 The Greatest,2009,Romance,115862 Come Early Morning,2006,Romance,117560 Lucky Break,2001,Romance,54606 "Surfer, Dude",2008,Comedy,36497 Deadfall,2012,Crime,65804 L'auberge espagnole,2002,Comedy,3895664 Murder by Numbers,2002,Crime,31874869 Winter in Wartime,2008,Drama,542860 The Protector,2005,Drama,11905519 Bend It Like Beckham,2002,Sport,32541719 Sunshine State,2002,Drama,3064356 Crossover,2006,Action,7009668 [Rec] 2,2009,Horror,27024 The Sting,1973,Drama,159600000 Chariots of Fire,1981,Drama,58800000 Diary of a Mad Black Woman,2005,Comedy,50382128 Shine,1996,Romance,35811509 Don Jon,2013,Romance,24475193 Ghost World,2001,Comedy,6200756 Iris,2001,Romance,1292119 The Chorus,2004,Drama,3629758 Mambo Italiano,2003,Comedy,6239558 Wonderland,2003,Thriller,1056102 Do the Right Thing,1989,Drama,27545445 Harvard Man,2001,Thriller,56007 Le Havre,2011,Comedy,611709 R100,2013,Drama,22770 Salvation Boulevard,2011,Action,27445 The Ten,2007,Romance,766487 Headhunters,2011,Drama,1196752 Saint Ralph,2004,Sport,795126 Insidious: Chapter 2,2013,Horror,83574831 Saw II,2005,Mystery,87025093 10 Cloverfield Lane,2016,Thriller,71897215 Jackass: The Movie,2002,Comedy,64267897 Lights Out,2016,Horror,56536016 Paranormal Activity 3,2011,Horror,104007828 Ouija,2014,Fantasy,50820940 A Nightmare on Elm Street 3: Dream Warriors,1987,Action,44793200 The Gift,2015,Mystery,43771291 Instructions Not Included,2013,Drama,44456509 Paranormal Activity 4,2012,Horror,53884821 The Robe,1953,History,36000000 Freddy's Dead: The Final Nightmare,1991,Thriller,34872293 Monster,2003,Crime,34468224 Paranormal Activity: The Marked Ones,2014,Thriller,32453345 Dallas Buyers Club,2013,Drama,27296514 The Lazarus Effect,2015,Sci-Fi,25799043 Memento,2000,Mystery,25530884 Oculus,2013,Horror,27689474 Clerks II,2006,Comedy,24138847 Billy Elliot,2000,Drama,21994911 The Way Way Back,2013,Drama,21501098 House Party 2,1991,Romance,19281235 Doug's 1st Movie,1999,Comedy,19421271 The Apostle,1997,Drama,20733485 Our Idiot Brother,2011,Comedy,24809547 The Players Club,1998,Drama,23031390 O,2001,Thriller,16017403 "As Above, So Below",2014,Horror,21197315 Addicted,2014,Drama,17382982 Eve's Bayou,1997,Drama,14821531 Still Alice,2014,Drama,18656400 Friday the 13th Part VIII: Jason Takes Manhattan,1989,Horror,14343976 My Big Fat Greek Wedding,2002,Romance,241437427 Spring Breakers,2012,Drama,14123773 Halloween: The Curse of Michael Myers,1995,Thriller,15126948 Y Tu Mamá También,2001,Adventure,13622333 Shaun of the Dead,2004,Horror,13464388 The Haunting of Molly Hartley,2008,Drama,13350177 Lone Star,1996,Mystery,13269963 Halloween 4: The Return of Michael Myers,1988,Horror,17768000 April Fool's Day,1986,Horror,12947763 Diner,1982,Comedy,14100000 Lone Wolf McQuade,1983,Action,12200000 Apollo 18,2011,Horror,17683670 Sunshine Cleaning,2008,Comedy,12055108 No Escape,2015,Action,27285953 Not Easily Broken,2009,Drama,10572742 Digimon: The Movie,2000,Sci-Fi,9628751 Saved!,2004,Drama,8786715 The Barbarian Invasions,2003,Romance,3432342 The Forsaken,2001,Thriller,6755271 UHF,1989,Drama,6157157 Slums of Beverly Hills,1998,Drama,5480318 Made,2001,Crime,5308707 Moon,2009,Mystery,5009677 The Sweet Hereafter,1997,Drama,4306697 Of Gods and Men,2010,Drama,3950029 Bottle Shock,2008,Drama,4040588 Heavenly Creatures,1994,Drama,3049135 90 Minutes in Heaven,2015,Drama,4700361 Everything Must Go,2010,Comedy,2711210 Zero Effect,1998,Comedy,1980338 The Machinist,2004,Thriller,1082044 Light Sleeper,1992,Drama,1100000 Kill the Messenger,2014,Drama,2445646 Rabbit Hole,2010,Drama,2221809 Party Monster,2003,Thriller,296665 Green Room,2015,Thriller,3219029 Bottle Rocket,1996,Drama,1040879 Albino Alligator,1996,Thriller,326308 "Lovely, Still",2008,Drama,124720 Desert Blue,1998,Drama,99147 Redacted,2007,Crime,65087 Fascination,2004,Thriller,16066 I Served the King of England,2006,Comedy,617228 Sling Blade,1996,Drama,24475416 Hostel,2005,Horror,47277326 Tristram Shandy: A Cock and Bull Story,2005,Drama,1247453 Take Shelter,2011,Thriller,1729969 Lady in White,1988,Mystery,1705139 The Texas Chainsaw Massacre 2,1986,Horror,8025872 Only God Forgives,2013,Drama,778565 The Names of Love,2010,Comedy,513836 Savage Grace,2007,Drama,434417 Police Academy,1984,Comedy,81200000 Four Weddings and a Funeral,1994,Romance,52700832 25th Hour,2002,Drama,13060843 Bound,1996,Thriller,3798532 Requiem for a Dream,2000,Drama,3609278 Tango,1998,Musical,1687311 Donnie Darko,2001,Thriller,727883 Character,1997,Mystery,713413 Spun,2002,Drama,410241 Lady Vengeance,2005,Crime,211667 Mean Machine,2001,Drama,92191 Exiled,2006,Action,49413 After.Life,2009,Horror,108229 One Flew Over the Cuckoo's Nest,1975,Drama,112000000 The Sweeney,2012,Action,26345 Whale Rider,2002,Drama,20772796 Pan,2015,Adventure,34964818 Night Watch,2004,Fantasy,1487477 The Crying Game,1992,Thriller,62549000 Porky's,1981,Comedy,105500000 Survival of the Dead,2009,Horror,101055 Lost in Translation,2003,Drama,44566004 Annie Hall,1977,Romance,39200000 The Greatest Show on Earth,1952,Romance,36000000 Exodus: Gods and Kings,2014,Adventure,65007045 Monster's Ball,2001,Romance,31252964 Maggie,2015,Drama,131175 Leaving Las Vegas,1995,Drama,31968347 The Boy Next Door,2015,Thriller,35385560 The Kids Are All Right,2010,Comedy,20803237 They Live,1988,Thriller,13008928 The Last Exorcism Part II,2013,Horror,15152879 Boyhood,2014,Drama,25359200 Scoop,2006,Comedy,10515579 Planet of the Apes,2001,Adventure,180011740 The Wash,2001,Comedy,10097096 3 Strikes,2000,Comedy,9821335 The Cooler,2003,Romance,8243880 The Night Listener,2006,Mystery,7825820 My Soul to Take,2010,Mystery,14637490 The Orphanage,2007,Thriller,7159147 A Haunted House 2,2014,Comedy,17314483 The Rules of Attraction,2002,Comedy,6525762 Four Rooms,1995,Comedy,4301331 Secretary,2002,Comedy,4046737 The Real Cancun,2003,Documentary,3713002 Talk Radio,1988,Drama,3468572 Waiting for Guffman,1996,Comedy,2892582 Love Stinks,1999,Comedy,2800000 You Kill Me,2007,Crime,2426851 Thumbsucker,2005,Comedy,1325073 Mirrormask,2005,Adventure,864959 Samsara,2011,Music,2601847 The Barbarians,1987,Adventure,800000 Poolhall Junkies,2002,Drama,562059 The Loss of Sexual Innocence,1999,Drama,399793 Joe,2013,Drama,371897 Shooting Fish,1997,Crime,302204 Prison,1987,Crime,354704 Psycho Beach Party,2000,Mystery,265107 The Big Tease,1999,Comedy,185577 Trust,2010,Crime,58214 An Everlasting Piece,2000,Comedy,75078 Adore,2013,Drama,317125 Mondays in the Sun,2002,Drama,146402 Stake Land,2010,Sci-Fi,18469 The Last Time I Committed Suicide,1997,Drama,12836 Futuro Beach,2014,Drama,20262 Gone with the Wind,1939,War,198655278 Desert Dancer,2014,Drama,143653 Major Dundee,1965,Adventure,14873 Annie Get Your Gun,1950,Romance,8000000 Defendor,2009,Drama,37606 The Pirate,1948,Musical,2956000 The Good Heart,2009,Drama,19959 The History Boys,2006,Comedy,2706659 Unknown,2011,Action,61094903 The Full Monty,1997,Music,45857453 Airplane!,1980,Comedy,83400000 Friday,1995,Drama,27900000 Menace II Society,1993,Drama,27900000 Creepshow 2,1987,Horror,14000000 The Witch,2015,Mystery,25138292 I Got the Hook Up,1998,Comedy,10305534 She's the One,1996,Romance,9449219 Gods and Monsters,1998,Biography,6390032 The Secret in Their Eyes,2009,Mystery,20167424 Evil Dead II,1987,Horror,5923044 Pootie Tang,2001,Musical,3293258 La otra conquista,1998,History,886410 Trollhunter,2010,Horror,252652 Ira & Abby,2006,Romance,220234 The Watch,2012,Sci-Fi,34350553 Winter Passing,2005,Comedy,101228 D.E.B.S.,2004,Romance,96793 March of the Penguins,2005,Documentary,77413017 Margin Call,2011,Biography,5354039 Choke,2008,Drama,2926565 Whiplash,2014,Drama,13092000 City of God,2002,Drama,7563397 Human Traffic,1999,Music,104257 The Hunt,2012,Drama,610968 Bella,2006,Romance,8108247 Maria Full of Grace,2004,Drama,6517198 Beginners,2010,Drama,5776314 Animal House,1978,Comedy,141600000 Goldfinger,1964,Thriller,51100000 Trainspotting,1996,Drama,16501785 The Original Kings of Comedy,2000,Documentary,38168022 Paranormal Activity 2,2010,Horror,84749884 Waking Ned Devine,1998,Comedy,24788807 Bowling for Columbine,2002,Drama,21244913 A Nightmare on Elm Street 2: Freddy's Revenge,1985,Fantasy,30000000 A Room with a View,1985,Romance,20966644 The Purge,2013,Horror,64423650 Sinister,2012,Horror,48056940 Martin Lawrence Live: Runteldat,2002,Comedy,19184015 Air Bud,1997,Comedy,24629916 Jason Lives: Friday the 13th Part VI,1986,Horror,19472057 The Bridge on the River Kwai,1957,War,27200000 Spaced Invaders,1990,Adventure,15369573 Jason Goes to Hell: The Final Friday,1993,Fantasy,15935068 Dave Chappelle's Block Party,2005,Documentary,11694528 Next Day Air,2009,Comedy,10017041 Phat Girlz,2006,Comedy,7059537 Before Midnight,2013,Romance,8114507 Teen Wolf Too,1987,Fantasy,7888703 Phantasm II,1988,Sci-Fi,7282851 Real Women Have Curves,2002,Comedy,5844929 East Is East,1999,Drama,4170647 Whipped,2000,Comedy,4142507 Kama Sutra: A Tale of Love,1996,Crime,4109095 Warlock: The Armageddon,1993,Fantasy,3902679 8 Heads in a Duffel Bag,1997,Crime,3559990 Thirteen Conversations About One Thing,2001,Drama,3287435 Jawbreaker,1999,Thriller,3071947 Basquiat,1996,Biography,2961991 Tsotsi,2005,Drama,2912363 DysFunktional Family,2003,Comedy,2223990 Tusk,2014,Horror,1821983 Oldboy,2003,Thriller,2181290 Letters to God,2010,Family,2848578 Hobo with a Shotgun,2011,Action,703002 Bachelorette,2012,Romance,418268 Tim and Eric's Billion Dollar Movie,2012,Comedy,200803 The Gambler,2014,Thriller,33631221 Summer Storm,2004,Sport,95016 Chain Letter,2009,Horror,143000 Just Looking,1999,Drama,39852 The Divide,2011,Thriller,22000 Alice in Wonderland,2010,Fantasy,334185206 Cinderella,2015,Fantasy,201148159 Central Station,1998,Drama,5595428 Boynton Beach Club,2005,Romance,3123749 High Tension,2003,Horror,3645438 Hustle & Flow,2005,Crime,22201636 Some Like It Hot,1959,Romance,25000000 Friday the 13th Part VII: The New Blood,1988,Horror,19170001 The Wizard of Oz,1939,Fantasy,22202612 Young Frankenstein,1974,Comedy,86300000 Diary of the Dead,2007,Horror,952620 Ulee's Gold,1997,Drama,9054736 Blazing Saddles,1974,Western,119500000 Friday the 13th: The Final Chapter,1984,Thriller,32600000 Maurice,1987,Romance,3130592 The Astronaut's Wife,1999,Thriller,10654581 Timecrimes,2007,Sci-Fi,38108 A Haunted House,2013,Fantasy,40041683 2016: Obama's America,2012,Documentary,33349949 Halloween II,2009,Horror,33386128 That Thing You Do!,1996,Comedy,25809813 Halloween III: Season of the Witch,1982,Mystery,14400000 Kevin Hart: Let Me Explain,2013,Comedy,32230907 My Own Private Idaho,1991,Drama,6401336 Garden State,2004,Comedy,26781723 Before Sunrise,1995,Romance,5400000 Jesus' Son,1999,Drama,1282084 Robot & Frank,2012,Crime,3325638 My Life Without Me,2003,Romance,395592 The Spectacular Now,2013,Comedy,6851969 Religulous,2008,Comedy,12995673 Fuel,2008,Documentary,173783 Dodgeball: A True Underdog Story,2004,Sport,114324072 Eye of the Dolphin,2006,Family,71904 8: The Mormon Proposition,2010,Documentary,99851 The Other End of the Line,2008,Drama,115504 Anatomy,2000,Horror,5725 Sleep Dealer,2008,Thriller,75727 Super,2010,Drama,322157 Get on the Bus,1996,Drama,5731103 Thr3e,2006,Drama,978908 This Is England,2006,Crime,327919 Go for It!,2011,Musical,178739 Friday the 13th Part III,1982,Thriller,36200000 Friday the 13th: A New Beginning,1985,Thriller,21300000 The Last Sin Eater,2007,Drama,379643 The Best Years of Our Lives,1946,Drama,23650000 Elling,2001,Comedy,313436 From Russia with Love,1963,Thriller,24800000 The Toxic Avenger Part II,1989,Comedy,792966 It Follows,2014,Horror,14673301 Mad Max 2: The Road Warrior,1981,Action,9003011 The Legend of Drunken Master,1994,Comedy,11546543 Boys Don't Cry,1999,Crime,11533945 Silent House,2011,Drama,12555230 The Lives of Others,2006,Thriller,11284657 Courageous,2011,Drama,34522221 The Triplets of Belleville,2003,Animation,7002255 Smoke Signals,1998,Comedy,6719300 Before Sunset,2004,Drama,5792822 Amores Perros,2000,Thriller,5383834 Thirteen,2003,Drama,4599680 Winter's Bone,2010,Drama,6531491 Me and You and Everyone We Know,2005,Comedy,3885134 We Are Your Friends,2015,Drama,3590010 Harsh Times,2005,Thriller,3335839 Captive,2015,Thriller,2557668 Full Frontal,2002,Romance,2506446 Witchboard,1986,Thriller,7369373 Hamlet,1996,Drama,4414535 Shortbus,2006,Drama,1984378 Waltz with Bashir,2008,Documentary,2283276 "The Book of Mormon Movie, Volume 1: The Journey",2003,Adventure,1098224 The Diary of a Teenage Girl,2015,Drama,1477002 In the Shadow of the Moon,2007,History,1134049 The Virginity Hit,2010,Comedy,535249 House of D,2004,Comedy,371081 Six-String Samurai,1998,Drama,124494 Saint John of Las Vegas,2009,Drama,100669 Stonewall,2015,Drama,186354 London,2005,Drama,12667 Sherrybaby,2006,Drama,198407 Stealing Harvard,2002,Crime,13973532 Gangster's Paradise: Jerusalema,2008,Drama,4958 The Lady from Shanghai,1947,Crime,7927 The Ghastly Love of Johnny X,2012,Comedy,2436 River's Edge,1986,Drama,4600000 Northfork,2003,Drama,1420578 Buried,2010,Drama,1028658 One to Another,2006,Drama,18435 Carrie,2013,Fantasy,35266619 A Nightmare on Elm Street,1984,Horror,26505000 Man on Wire,2008,Crime,2957978 Brotherly Love,2015,Drama,444044 The Last Exorcism,2010,Horror,40990055 El crimen del padre Amaro,2002,Drama,5709616 Beasts of the Southern Wild,2012,Drama,12784397 Songcatcher,2000,Music,3050934 Run Lola Run,1998,Crime,7267324 May,2002,Horror,145540 In the Bedroom,2001,Drama,35918429 I Spit on Your Grave,2010,Horror,92401 "Happy, Texas",1999,Crime,1943649 My Summer of Love,2004,Drama,992238 The Lunchbox,2013,Drama,4231500 Yes,2004,Drama,396035 Caramel,2007,Romance,1060591 Mississippi Mermaid,1969,Drama,26893 I Love Your Work,2003,Mystery,2580 Dawn of the Dead,2004,Thriller,58885635 Waitress,2007,Drama,19067631 Bloodsport,1988,Drama,11806119 The Squid and the Whale,2005,Drama,7362100 Kissing Jessica Stein,2001,Comedy,7022940 Exotica,1994,Romance,5132222 Buffalo '66,1998,Comedy,2365931 Insidious,2010,Horror,53991137 Nine Queens,2000,Drama,1221261 The Ballad of Jack and Rose,2005,Drama,712294 The To Do List,2013,Comedy,3447339 Killing Zoe,1993,Thriller,418953 The Believer,2001,Drama,406035 Session 9,2001,Horror,373967 I Want Someone to Eat Cheese With,2006,Romance,194568 Modern Times,1936,Drama,163245 Stolen Summer,2002,Drama,119841 My Name Is Bruce,2007,Fantasy,173066 Pontypool,2008,Fantasy,3478 Trucker,2008,Drama,52166 The Lords of Salem,2012,Drama,1163508 Jack Reacher,2012,Crime,80033643 Snow White and the Seven Dwarfs,1937,Musical,184925485 The Holy Girl,2004,Drama,304124 Incident at Loch Ness,2004,Comedy,36830 "Lock, Stock and Two Smoking Barrels",1998,Crime,3650677 The Celebration,1998,Drama,1647780 Trees Lounge,1996,Drama,695229 Journey from the Fall,2006,Drama,638951 The Basket,1999,Drama,609042 Mercury Rising,1998,Crime,32940507 The Hebrew Hammer,2003,Comedy,19539 Friday the 13th Part 2,1981,Mystery,19100000 "Sex, Lies, and Videotape",1989,Drama,24741700 Saw,2004,Mystery,55153403 Super Troopers,2001,Comedy,18488314 The Day the Earth Stood Still,2008,Sci-Fi,79363785 Monsoon Wedding,2001,Comedy,13876974 You Can Count on Me,2000,Drama,9180275 Lucky Number Slevin,2006,Crime,22494487 But I'm a Cheerleader,1999,Comedy,2199853 Home Run,2013,Sport,2859955 Reservoir Dogs,1992,Crime,2812029 "The Good, the Bad and the Ugly",1966,Western,6100000 The Second Mother,2015,Comedy,375723 Blue Like Jazz,2012,Drama,594904 Down and Out with the Dolls,2001,Music,58936 Airborne,1993,Adventure,2850263 Waiting...,2005,Comedy,16101109 From a Whisper to a Scream,1987,Horror,1400000 Beyond the Black Rainbow,2010,Sci-Fi,56129 The Raid: Redemption,2011,Thriller,4105123 Rocky,1976,Drama,117235247 The Fog,1980,Horror,21378000 Unfriended,2014,Thriller,31537320 The Howling,1981,Horror,17986000 Dr. No,1962,Action,16067035 Chernobyl Diaries,2012,Thriller,18112929 Hellraiser,1987,Horror,14564027 God's Not Dead 2,2016,Drama,20773070 Cry_Wolf,2005,Mystery,10042266 Godzilla 2000,1999,Thriller,10037390 Blue Valentine,2010,Romance,9701559 Transamerica,2005,Adventure,9013113 The Devil Inside,2012,Horror,53245055 Beyond the Valley of the Dolls,1970,Music,9000000 The Green Inferno,2013,Horror,7186670 The Sessions,2012,Romance,5997134 Next Stop Wonderland,1998,Romance,3386698 Juno,2007,Comedy,143492840 Frozen River,2008,Drama,2508841 20 Feet from Stardom,2013,Documentary,4946250 Two Girls and a Guy,1997,Drama,1950218 Walking and Talking,1996,Comedy,1277257 The Full Monty,1997,Comedy,45857453 Who Killed the Electric Car?,2006,Documentary,1677838 The Broken Hearts Club: A Romantic Comedy,2000,Sport,1744858 Goosebumps,2015,Horror,80021740 Slam,1998,Drama,982214 Brigham City,2001,Crime,798341 All the Real Girls,2003,Romance,548712 Dream with the Fishes,1997,Drama,464655 Blue Car,2002,Drama,464126 Wristcutters: A Love Story,2006,Drama,104077 The Battle of Shaker Heights,2003,Comedy,279282 The Lovely Bones,2009,Fantasy,43982842 The Act of Killing,2012,Documentary,484221 Taxi to the Dark Side,2007,Crime,274661 Once in a Lifetime: The Extraordinary Story of the New York Cosmos,2006,Sport,144431 Antarctica: A Year on Ice,2013,Biography,287761 Hardflip,2012,Action,96734 The House of the Devil,2009,Horror,100659 The Perfect Host,2010,Comedy,48430 Safe Men,1998,Comedy,21210 The Specials,2000,Comedy,12996 Alone with Her,2006,Crime,10018 Creative Control,2015,Drama,62480 Special,2006,Drama,6387 In Her Line of Fire,2006,Drama,721 The Jimmy Show,2001,Drama,703 Trance,2013,Mystery,2319187 On the Waterfront,1954,Romance,9600000 L!fe Happens,2011,Comedy,20186 "4 Months, 3 Weeks and 2 Days",2007,Drama,1185783 Hard Candy,2005,Thriller,1007962 The Quiet,2005,Drama,381186 Fruitvale Station,2013,Romance,16097842 The Brass Teapot,2012,Fantasy,6643 Snitch,2013,Action,42919096 Latter Days,2003,Drama,819939 "For a Good Time, Call...",2012,Comedy,1243961 Time Changer,2002,Fantasy,15278 A Separation,2011,Mystery,7098492 Welcome to the Dollhouse,1995,Comedy,4771000 Ruby in Paradise,1993,Romance,1001437 Raising Victor Vargas,2002,Drama,2073984 Deterrence,1999,Drama,144583 Dead Snow,2009,Comedy,41709 American Graffiti,1973,Drama,115000000 Aqua Teen Hunger Force Colon Movie Film for Theaters,2007,Sci-Fi,5518918 Safety Not Guaranteed,2012,Comedy,4007792 Kill List,2011,Crime,26297 The Innkeepers,2011,Horror,77501 The Unborn,2009,Fantasy,42638165 Interview with the Assassin,2002,Drama,47329 Donkey Punch,2008,Drama,18378 Hoop Dreams,1994,Sport,7830611 King Kong,2005,Action,218051260 House of Wax,2005,Horror,32048809 Half Nelson,2006,Drama,2694973 Top Hat,1935,Musical,3000000 The Blair Witch Project,1999,Horror,140530114 Woodstock,1970,Documentary,13300000 Mercy Streets,2000,Drama,171988 Broken Vessels,1998,Drama,13493 A Hard Day's Night,1964,Musical,515005 Fireproof,2008,Romance,33451479 Benji,1974,Adventure,39552600 Open Water,2003,Drama,30500882 Kingdom of the Spiders,1977,Horror,17000000 The Station Agent,2003,Comedy,5739376 To Save a Life,2009,Drama,3773863 Beyond the Mat,1999,Documentary,2047570 Osama,2003,Drama,1127331 Sholem Aleichem: Laughing in the Darkness,2011,Documentary,906666 Groove,2000,Music,1114943 Twin Falls Idaho,1999,Drama,985341 Mean Creek,2004,Drama,603943 Hurricane Streets,1997,Drama,334041 Never Again,2001,Comedy,295468 Civil Brand,2002,Crime,243347 Lonesome Jim,2005,Comedy,154077 Seven Samurai,1954,Drama,269061 Finishing the Game: The Search for a New Bruce Lee,2007,Comedy,52850 Rubber,2010,Comedy,98017 Home,2015,Adventure,177343675 Kiss the Bride,2007,Romance,31937 The Slaughter Rule,2002,Drama,13134 Monsters,2010,Thriller,237301 Detention of the Dead,2012,Horror,1332 Crossroads,2002,Drama,37188667 Oz the Great and Powerful,2013,Adventure,234903076 Straight Out of Brooklyn,1991,Drama,2712293 Bloody Sunday,2002,History,768045 Conversations with Other Women,2005,Drama,379122 Poultrygeist: Night of the Chicken Dead,2006,Comedy,23000 42nd Street,1933,Comedy,2300000 Metropolitan,1990,Drama,2938208 Napoleon Dynamite,2004,Comedy,44540956 Blue Ruin,2013,Drama,258113 Paranormal Activity,2007,Horror,107917283 Monty Python and the Holy Grail,1975,Fantasy,1229197 Quinceañera,2006,Drama,1689999 Tarnation,2003,Documentary,592014 The Beyond,1981,Horror,126387 What Happens in Vegas,2008,Comedy,80276912 The Broadway Melody,1929,Musical,2808000 Maniac,2012,Horror,12843 Murderball,2005,Documentary,1523883 American Ninja 2: The Confrontation,1987,Action,4000000 Halloween,1978,Thriller,47000000 Tumbleweeds,1999,Drama,1281176 The Prophecy,1995,Thriller,16115878 When the Cat's Away,1996,Comedy,1652472 Pieces of April,2003,Drama,2360184 Old Joy,2006,Drama,255352 Wendy and Lucy,2008,Drama,856942 Fighting Tommy Riley,2004,Drama,5199 Across the Universe,2007,Musical,24343673 Locker 13,2014,Thriller,2468 Compliance,2012,Crime,318622 Chasing Amy,1997,Comedy,12006514 Lovely & Amazing,2001,Drama,4186931 Better Luck Tomorrow,2002,Romance,3799339 The Incredibly True Adventure of Two Girls in Love,1995,Comedy,1977544 Chuck & Buck,2000,Drama,1050600 American Desi,2001,Comedy,902835 Cube,1997,Mystery,489220 I Married a Strange Person!,1997,Animation,203134 November,2004,Drama,191309 Like Crazy,2011,Romance,3388210 The Canyons,2013,Thriller,49494 Burn,2012,Documentary,111300 Urbania,2000,Drama,1027119 "The Beast from 20,000 Fathoms",1953,Horror,5000000 Swingers,1996,Comedy,4505922 A Fistful of Dollars,1964,Drama,3500000 Side Effects,2013,Drama,32154410 The Trials of Darryl Hunt,2006,Documentary,1111 Children of Heaven,1997,Family,925402 Weekend,2011,Romance,469947 She's Gotta Have It,1986,Comedy,7137502 Another Earth,2011,Romance,1316074 Sweet Sweetback's Baadasssss Song,1971,Thriller,15180000 Tadpole,2000,Romance,2882062 Once,2007,Music,9437933 The Horse Boy,2009,Documentary,155984 The Texas Chain Saw Massacre,1974,Horror,30859000 Roger & Me,1989,Documentary,6706368 Facing the Giants,2006,Sport,10174663 The Gallows,2015,Horror,22757819 Hollywood Shuffle,1987,Comedy,5228617 The Lost Skeleton of Cadavra,2001,Horror,110536 Cheap Thrills,2013,Drama,59379 The Last House on the Left,2009,Thriller,32721635 Pi,1998,Thriller,3216970 20 Dates,1998,Comedy,536767 Super Size Me,2004,Comedy,11529368 The FP,2011,Comedy,40557 Happy Christmas,2014,Comedy,30084 The Brothers McMullen,1995,Drama,10246600 Tiny Furniture,2010,Romance,389804 George Washington,2000,Drama,241816 Smiling Fish & Goat on Fire,1999,Comedy,277233 Clerks,1994,Comedy,3151130 In the Company of Men,1997,Comedy,2856622 Sabotage,2014,Action,10499968 Slacker,1991,Drama,1227508 Clean,2004,Romance,136007 The Circle,2000,Drama,673780 Primer,2004,Thriller,424760 El Mariachi,1992,Romance,2040920 My Date with Drew,2004,Documentary,85222 ================================================ FILE: R/inst/tutorials/02-statistics/stats.R ================================================ library(metaflow) # The start step: start <- function(self){ # Loads the movie data into a data frame self$df <- read.csv("./movies.csv", stringsAsFactors=FALSE) # find all unique genres self$genres <- levels(as.factor(self$df$genre)) } # Compute statistics for a single genre. compute_stats <- function(self){ self$genre <- self$input message("Computing statistics for ", self$genre) # Find all the movies that have this genre self$df_by_genre <- self$df[self$df$genre == self$genre, ] gross <- self$df_by_genre$gross # Get some statistics on the gross box office for these titles. self$median <- median(gross) self$mean <- mean(gross) } # Join our parallel branches and merge results into a data frame. join <- function(self, inputs){ self$stats <- data.frame( "genres" = unlist(lapply(inputs, function(inp){inp$genre})), "median" = unlist(lapply(inputs, function(inp){inp$median})), "mean" = unlist(lapply(inputs, function(inp){inp$mean}))) print(head(self$stats)) } metaflow("MovieStatsFlow") %>% step(step = "start", r_function = start, next_step = "compute_stats", foreach = "genres") %>% step(step = "compute_stats", r_function = compute_stats, next_step = "join") %>% step(step = "join", r_function = join, next_step = "end", join = TRUE) %>% step(step = "end") %>% run() ================================================ FILE: R/inst/tutorials/02-statistics/stats.Rmd ================================================ --- title: "Episode 02: Is this Data Science?" output: html_document: df_print: paged --- MovieStatsFlow loads the movie metadata CSV file into a Pandas Dataframe and computes some movie genre-specific statistics. You can use this notebook and the Metaflow client to eyeball the results and make some simple plots. ```{r} suppressPackageStartupMessages(library(metaflow)) message("Current metadata provider: ", get_metadata()) message("Current namespace: ", get_namespace()) ``` ## Get the movie statistics from the latest run of MovieStatsFlow ```{r} flow <- flow_client$new("MovieStatsFlow") run_id <- flow$latest_successful_run run <- run_client$new(flow, run_id) df <- run$artifact("stats") print(head(df)) ``` ## Create a bar plot of median gross box office of top 5 movies ```{r} df <- df[order(df$median, decreasing = TRUE), ] print(head(df)) barplot(df$median[1:5], names.arg=df$genres[1:5]) ``` ================================================ FILE: R/inst/tutorials/03-playlist-redux/README.md ================================================ # Episode 03-playlist-redux: Follow the Money. **Use Metaflow to load the statistics generated from 'Episode 02' and recommend movies from a genre with highest median gross box office** #### Showcasing: - Using data artifacts generated from other flows. #### Before playing this episode: 1. Run 'Episode 02-statistics: Is this Data Science?' 2. Configure your metadata provider to a user-wise global provider, if you haven't done it already. ```bash $mkdir -p /path/to/home/.metaflow $export METAFLOW_DEFAULT_METADATA=local ``` #### To play this episode: In a terminal: 1. ```cd tutorials/03-playlist-redux``` 2. ```Rscript playlist.R show``` 3. ```Rscript playlist.R run``` If you are using RStudio, you can run this script by directly executing `source("playlist.R")`. In this ```PlayListReduxFlow```, we reuse the genre median gross box office statistics computed from ```MoviesStatsFlow```, pick the genre with the highest median gross box office, and create a randomized playlist of movies of this picked genre. ================================================ FILE: R/inst/tutorials/03-playlist-redux/movies.csv ================================================ movie_title,title_year,genre,gross Avatar,2009,Action,760505847 Pirates of the Caribbean: At World's End,2007,Fantasy,309404152 Spectre,2015,Thriller,200074175 The Dark Knight Rises,2012,Thriller,448130642 John Carter,2012,Action,73058679 Spider-Man 3,2007,Romance,336530303 Tangled,2010,Romance,200807262 Avengers: Age of Ultron,2015,Action,458991599 Harry Potter and the Half-Blood Prince,2009,Fantasy,301956980 Batman v Superman: Dawn of Justice,2016,Adventure,330249062 Superman Returns,2006,Adventure,200069408 Quantum of Solace,2008,Action,168368427 Pirates of the Caribbean: Dead Man's Chest,2006,Action,423032628 The Lone Ranger,2013,Adventure,89289910 Man of Steel,2013,Action,291021565 The Chronicles of Narnia: Prince Caspian,2008,Family,141614023 The Avengers,2012,Adventure,623279547 Pirates of the Caribbean: On Stranger Tides,2011,Action,241063875 Men in Black 3,2012,Sci-Fi,179020854 The Hobbit: The Battle of the Five Armies,2014,Adventure,255108370 The Amazing Spider-Man,2012,Fantasy,262030663 Robin Hood,2010,Drama,105219735 The Hobbit: The Desolation of Smaug,2013,Adventure,258355354 The Golden Compass,2007,Fantasy,70083519 King Kong,2005,Drama,218051260 Titanic,1997,Drama,658672302 Captain America: Civil War,2016,Adventure,407197282 Battleship,2012,Sci-Fi,65173160 Jurassic World,2015,Thriller,652177271 Skyfall,2012,Action,304360277 Spider-Man 2,2004,Romance,373377893 Iron Man 3,2013,Adventure,408992272 Alice in Wonderland,2010,Adventure,334185206 X-Men: The Last Stand,2006,Sci-Fi,234360014 Monsters University,2013,Fantasy,268488329 Transformers: Revenge of the Fallen,2009,Adventure,402076689 Transformers: Age of Extinction,2014,Sci-Fi,245428137 Oz the Great and Powerful,2013,Family,234903076 The Amazing Spider-Man 2,2014,Fantasy,202853933 TRON: Legacy,2010,Sci-Fi,172051787 Cars 2,2011,Comedy,191450875 Green Lantern,2011,Action,116593191 Toy Story 3,2010,Adventure,414984497 Terminator Salvation,2009,Action,125320003 Furious 7,2015,Crime,350034110 World War Z,2013,Thriller,202351611 X-Men: Days of Future Past,2014,Fantasy,233914986 Star Trek Into Darkness,2013,Adventure,228756232 Jack the Giant Slayer,2013,Fantasy,65171860 The Great Gatsby,2013,Drama,144812796 Prince of Persia: The Sands of Time,2010,Romance,90755643 Pacific Rim,2013,Action,101785482 Transformers: Dark of the Moon,2011,Sci-Fi,352358779 Indiana Jones and the Kingdom of the Crystal Skull,2008,Action,317011114 Brave,2012,Family,237282182 Star Trek Beyond,2016,Thriller,130468626 WALL·E,2008,Animation,223806889 Rush Hour 3,2007,Action,140080850 2012,2009,Action,166112167 A Christmas Carol,2009,Fantasy,137850096 Jupiter Ascending,2015,Sci-Fi,47375327 The Legend of Tarzan,2016,Romance,124051759 "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe",2005,Adventure,291709845 X-Men: Apocalypse,2016,Adventure,154985087 The Dark Knight,2008,Thriller,533316061 Up,2009,Family,292979556 Monsters vs. Aliens,2009,Action,198332128 Iron Man,2008,Action,318298180 Hugo,2011,Family,73820094 Wild Wild West,1999,Sci-Fi,113745408 The Mummy: Tomb of the Dragon Emperor,2008,Fantasy,102176165 Suicide Squad,2016,Adventure,161087183 Evan Almighty,2007,Family,100289690 Edge of Tomorrow,2014,Adventure,100189501 Waterworld,1995,Sci-Fi,88246220 G.I. Joe: The Rise of Cobra,2009,Sci-Fi,150167630 Inside Out,2015,Comedy,356454367 The Jungle Book,2016,Drama,362645141 Iron Man 2,2010,Sci-Fi,312057433 Snow White and the Huntsman,2012,Action,155111815 Maleficent,2014,Fantasy,241407328 Dawn of the Planet of the Apes,2014,Drama,208543795 47 Ronin,2013,Fantasy,38297305 Captain America: The Winter Soldier,2014,Action,259746958 Shrek Forever After,2010,Animation,238371987 Tomorrowland,2015,Action,93417865 Big Hero 6,2014,Adventure,222487711 Wreck-It Ralph,2012,Sci-Fi,189412677 The Polar Express,2004,Animation,665426 Independence Day: Resurgence,2016,Adventure,102315545 How to Train Your Dragon,2010,Adventure,217387997 Terminator 3: Rise of the Machines,2003,Action,150350192 Guardians of the Galaxy,2014,Adventure,333130696 Interstellar,2014,Drama,187991439 Inception,2010,Sci-Fi,292568851 The Fast and the Furious,2001,Crime,144512310 The Curious Case of Benjamin Button,2008,Drama,127490802 X-Men: First Class,2011,Sci-Fi,146405371 The Hunger Games: Mockingjay - Part 2,2015,Sci-Fi,281666058 The Sorcerer's Apprentice,2010,Adventure,63143812 Poseidon,2006,Action,60655503 Alice Through the Looking Glass,2016,Fantasy,76846624 Shrek the Third,2007,Comedy,320706665 Warcraft,2016,Fantasy,46978995 Terminator Genisys,2015,Adventure,89732035 The Chronicles of Narnia: The Voyage of the Dawn Treader,2010,Adventure,104383624 Pearl Harbor,2001,War,198539855 Transformers,2007,Action,318759914 Alexander,2004,Biography,34293771 Harry Potter and the Order of the Phoenix,2007,Family,292000866 Harry Potter and the Goblet of Fire,2005,Family,289994397 Hancock,2008,Action,227946274 I Am Legend,2007,Sci-Fi,256386216 Charlie and the Chocolate Factory,2005,Adventure,206456431 Ratatouille,2007,Comedy,206435493 Batman Begins,2005,Adventure,205343774 Madagascar: Escape 2 Africa,2008,Comedy,179982968 Night at the Museum: Battle of the Smithsonian,2009,Comedy,177243721 X-Men Origins: Wolverine,2009,Thriller,179883016 The Matrix Revolutions,2003,Action,139259759 Frozen,2013,Adventure,400736600 The Matrix Reloaded,2003,Action,281492479 Thor: The Dark World,2013,Adventure,206360018 Mad Max: Fury Road,2015,Action,153629485 Angels & Demons,2009,Mystery,133375846 Thor,2011,Fantasy,181015141 Bolt,2008,Comedy,114053579 G-Force,2009,Fantasy,119420252 Wrath of the Titans,2012,Adventure,83640426 Dark Shadows,2012,Horror,79711678 Mission: Impossible - Rogue Nation,2015,Thriller,195000874 The Wolfman,2010,Drama,61937495 The Legend of Tarzan,2016,Adventure,124051759 Bee Movie,2007,Family,126597121 Kung Fu Panda 2,2011,Action,165230261 The Last Airbender,2010,Action,131564731 Mission: Impossible III,2006,Adventure,133382309 White House Down,2013,Thriller,73103784 Mars Needs Moms,2011,Family,21379315 Flushed Away,2006,Family,64459316 Pan,2015,Adventure,34964818 Mr. Peabody & Sherman,2014,Adventure,111505642 Troy,2004,Adventure,133228348 Madagascar 3: Europe's Most Wanted,2012,Family,216366733 Die Another Day,2002,Thriller,160201106 Ghostbusters,2016,Action,118099659 Armageddon,1998,Sci-Fi,201573391 Men in Black II,2002,Action,190418803 Beowulf,2007,Adventure,82161969 Kung Fu Panda 3,2016,Comedy,143523463 Mission: Impossible - Ghost Protocol,2011,Action,209364921 Rise of the Guardians,2012,Fantasy,103400692 Fun with Dick and Jane,2005,Comedy,110332737 The Last Samurai,2003,Action,111110575 Exodus: Gods and Kings,2014,Drama,65007045 Star Trek,2009,Sci-Fi,257704099 Spider-Man,2002,Romance,403706375 How to Train Your Dragon 2,2014,Action,176997107 Gods of Egypt,2016,Action,31141074 Stealth,2005,Adventure,31704416 Watchmen,2009,Mystery,107503316 Lethal Weapon 4,1998,Thriller,129734803 Hulk,2003,Sci-Fi,132122995 G.I. Joe: Retaliation,2013,Thriller,122512052 Sahara,2005,Comedy,68642452 Final Fantasy: The Spirits Within,2001,Animation,32131830 Captain America: The First Avenger,2011,Adventure,176636816 The World Is Not Enough,1999,Adventure,126930660 Master and Commander: The Far Side of the World,2003,Adventure,93926386 The Twilight Saga: Breaking Dawn - Part 2,2012,Drama,292298923 Happy Feet 2,2011,Musical,63992328 The Incredible Hulk,2008,Adventure,134518390 The BFG,2016,Family,52792307 The Revenant,2015,Drama,183635922 Turbo,2013,Animation,83024900 Rango,2011,Adventure,123207194 Penguins of Madagascar,2014,Animation,83348920 The Bourne Ultimatum,2007,Thriller,227137090 Kung Fu Panda,2008,Animation,215395021 Ant-Man,2015,Action,180191634 The Hunger Games: Catching Fire,2013,Thriller,424645577 The Twilight Saga: Breaking Dawn - Part 2,2012,Adventure,292298923 Home,2015,Sci-Fi,177343675 War of the Worlds,2005,Adventure,234277056 Bad Boys II,2003,Crime,138396624 Puss in Boots,2011,Family,149234747 Salt,2010,Crime,118311368 Noah,2014,Adventure,101160529 The Adventures of Tintin,2011,Action,77564037 Harry Potter and the Prisoner of Azkaban,2004,Adventure,249358727 Australia,2008,Romance,49551662 After Earth,2013,Action,60522097 Dinosaur,2000,Animation,137748063 Night at the Museum: Secret of the Tomb,2014,Fantasy,113733726 Megamind,2010,Sci-Fi,148337537 Harry Potter and the Sorcerer's Stone,2001,Adventure,317557891 R.I.P.D.,2013,Comedy,33592415 Pirates of the Caribbean: The Curse of the Black Pearl,2003,Adventure,305388685 The Hunger Games: Mockingjay - Part 1,2014,Thriller,337103873 The Da Vinci Code,2006,Thriller,217536138 Rio 2,2014,Comedy,131536019 X-Men 2,2003,Thriller,214948780 Fast Five,2011,Crime,209805005 Sherlock Holmes: A Game of Shadows,2011,Action,186830669 Clash of the Titans,2010,Fantasy,163192114 Total Recall,1990,Sci-Fi,119412921 The 13th Warrior,1999,Adventure,32694788 The Bourne Legacy,2012,Action,113165635 Batman & Robin,1997,Action,107285004 How the Grinch Stole Christmas,2000,Fantasy,260031035 The Day After Tomorrow,2004,Sci-Fi,186739919 Mission: Impossible II,2000,Thriller,215397307 The Perfect Storm,2000,Action,182618434 Fantastic 4: Rise of the Silver Surfer,2007,Sci-Fi,131920333 Life of Pi,2012,Adventure,124976634 Ghost Rider,2007,Fantasy,115802596 Jason Bourne,2016,Thriller,108521835 Charlie's Angels: Full Throttle,2003,Action,100685880 Prometheus,2012,Sci-Fi,126464904 Stuart Little 2,2002,Comedy,64736114 Elysium,2013,Thriller,93050117 The Chronicles of Riddick,2004,Sci-Fi,57637485 RoboCop,2014,Crime,58607007 Speed Racer,2008,Action,43929341 How Do You Know,2010,Comedy,30212620 Knight and Day,2010,Comedy,76418654 Oblivion,2013,Adventure,89021735 Star Wars: Episode III - Revenge of the Sith,2005,Sci-Fi,380262555 Star Wars: Episode II - Attack of the Clones,2002,Fantasy,310675583 "Monsters, Inc.",2001,Family,289907418 The Wolverine,2013,Thriller,132550960 Star Wars: Episode I - The Phantom Menace,1999,Adventure,474544677 The Croods,2013,Comedy,187165546 Windtalkers,2002,War,40911830 The Huntsman: Winter's War,2016,Drama,47952020 Teenage Mutant Ninja Turtles,2014,Action,190871240 Gravity,2013,Drama,274084951 Dante's Peak,1997,Thriller,67155742 Fantastic Four,2015,Action,56114221 Night at the Museum,2006,Fantasy,250863268 San Andreas,2015,Action,155181732 Tomorrow Never Dies,1997,Adventure,125332007 The Patriot,2000,Drama,113330342 Ocean's Twelve,2004,Thriller,125531634 Mr. & Mrs. Smith,2005,Comedy,186336103 Insurgent,2015,Adventure,129995817 The Aviator,2004,Biography,102608827 Gulliver's Travels,2010,Fantasy,42776259 The Green Hornet,2011,Comedy,98780042 300: Rise of an Empire,2014,Fantasy,106369117 The Smurfs,2011,Fantasy,142614158 Home on the Range,2004,Family,50026353 Allegiant,2016,Adventure,66002193 Real Steel,2011,Action,85463309 The Smurfs 2,2013,Fantasy,71017784 Speed 2: Cruise Control,1997,Romance,48068396 Ender's Game,2013,Action,61656849 Live Free or Die Hard,2007,Adventure,134520804 The Lord of the Rings: The Fellowship of the Ring,2001,Action,313837577 Around the World in 80 Days,2004,Action,24004159 Ali,2001,Sport,58183966 The Cat in the Hat,2003,Family,100446895 "I, Robot",2004,Action,144795350 Kingdom of Heaven,2005,History,47396698 Stuart Little,1999,Adventure,140015224 The Princess and the Frog,2009,Family,104374107 The Martian,2015,Drama,228430993 The Island,2005,Thriller,35799026 Town & Country,2001,Comedy,6712451 Gone in Sixty Seconds,2000,Crime,101643008 Gladiator,2000,Drama,187670866 Minority Report,2002,Thriller,132014112 Harry Potter and the Chamber of Secrets,2002,Family,261970615 Casino Royale,2006,Thriller,167007184 Planet of the Apes,2001,Sci-Fi,180011740 Terminator 2: Judgment Day,1991,Action,204843350 Public Enemies,2009,Romance,97030725 American Gangster,2007,Drama,130127620 True Lies,1994,Action,146282411 The Taking of Pelham 1 2 3,2009,Action,65452312 Little Fockers,2010,Romance,148383780 The Other Guys,2010,Action,119219978 Eraser,1996,Action,101228120 Django Unchained,2012,Drama,162804648 The Hunchback of Notre Dame,1996,Romance,100117603 The Emperor's New Groove,2000,Adventure,89296573 The Expendables 2,2012,Thriller,85017401 National Treasure,2004,Comedy,173005002 Eragon,2006,Action,75030163 Where the Wild Things Are,2009,Drama,77222184 Pan,2015,Family,34964818 Epic,2013,Adventure,107515297 The Tourist,2010,Thriller,67631157 End of Days,1999,Action,66862068 Blood Diamond,2006,Adventure,57366262 The Wolf of Wall Street,2013,Comedy,116866727 Batman Forever,1995,Adventure,184031112 Starship Troopers,1997,Sci-Fi,54700065 Cloud Atlas,2012,Sci-Fi,27098580 Legend of the Guardians: The Owls of Ga'Hoole,2010,Adventure,55673333 Catwoman,2004,Fantasy,40198710 Hercules,2014,Adventure,72660029 Treasure Planet,2002,Animation,38120554 Land of the Lost,2009,Adventure,49392095 The Expendables 3,2014,Action,39292022 Point Break,2015,Action,28772222 Son of the Mask,2005,Family,17010646 In the Heart of the Sea,2015,Action,24985612 The Adventures of Pluto Nash,2002,Sci-Fi,4411102 Green Zone,2010,Thriller,35024475 The Peanuts Movie,2015,Adventure,130174897 The Spanish Prisoner,1997,Mystery,10200000 The Mummy Returns,2001,Fantasy,202007640 Gangs of New York,2002,Drama,77679638 The Flowers of War,2011,Drama,9213 Surf's Up,2007,Comedy,58867694 The Stepford Wives,2004,Comedy,59475623 Black Hawk Down,2001,War,108638745 The Campaign,2012,Comedy,86897182 The Fifth Element,1997,Adventure,63540020 Sex and the City 2,2010,Comedy,95328937 The Road to El Dorado,2000,Comedy,50802661 Ice Age: Continental Drift,2012,Adventure,161317423 Cinderella,2015,Romance,201148159 The Lovely Bones,2009,Drama,43982842 Finding Nemo,2003,Comedy,380838870 The Lord of the Rings: The Return of the King,2003,Drama,377019252 The Lord of the Rings: The Two Towers,2002,Action,340478898 Seventh Son,2014,Adventure,17176900 Lara Croft: Tomb Raider,2001,Thriller,131144183 Transcendence,2014,Thriller,23014504 Jurassic Park III,2001,Thriller,181166115 Rise of the Planet of the Apes,2011,Action,176740650 The Spiderwick Chronicles,2008,Family,71148699 A Good Day to Die Hard,2013,Thriller,67344392 The Alamo,2004,Western,22406362 The Incredibles,2004,Adventure,261437578 Cutthroat Island,1995,Adventure,11000000 Percy Jackson & the Olympians: The Lightning Thief,2010,Family,88761720 Men in Black,1997,Family,250147615 Toy Story 2,1999,Comedy,245823397 Unstoppable,2010,Thriller,81557479 Rush Hour 2,2001,Comedy,226138454 What Lies Beneath,2000,Fantasy,155370362 Cloudy with a Chance of Meatballs,2009,Family,124870275 Ice Age: Dawn of the Dinosaurs,2009,Family,196573705 The Secret Life of Walter Mitty,2013,Fantasy,58229120 Charlie's Angels,2000,Action,125305545 The Departed,2006,Crime,132373442 Mulan,1998,Fantasy,120618403 Tropic Thunder,2008,Action,110416702 The Girl with the Dragon Tattoo,2011,Drama,102515793 Die Hard with a Vengeance,1995,Adventure,100012500 Sherlock Holmes,2009,Adventure,209019489 Atlantis: The Lost Empire,2001,Action,84037039 Alvin and the Chipmunks: The Road Chip,2015,Animation,85884815 Valkyrie,2008,History,83077470 You Don't Mess with the Zohan,2008,Comedy,100018837 Pixels,2015,Animation,78747585 A.I. Artificial Intelligence,2001,Drama,78616689 The Haunted Mansion,2003,Comedy,75817994 Contact,1997,Drama,100853835 Hollow Man,2000,Action,73209340 The Interpreter,2005,Crime,72515360 Percy Jackson: Sea of Monsters,2013,Fantasy,68558662 Lara Croft Tomb Raider: The Cradle of Life,2003,Fantasy,65653758 Now You See Me 2,2016,Comedy,64685359 The Saint,1997,Action,61355436 Spy Game,2001,Thriller,26871 Mission to Mars,2000,Thriller,60874615 Rio,2011,Adventure,143618384 Bicentennial Man,1999,Comedy,58220776 Volcano,1997,Action,47474112 The Devil's Own,1997,Thriller,42877165 K-19: The Widowmaker,2002,History,35168677 Fantastic Four,2015,Sci-Fi,56114221 Conan the Barbarian,1982,Fantasy,37567440 Cinderella Man,2005,Drama,61644321 The Nutcracker in 3D,2010,Fantasy,190562 Seabiscuit,2003,History,120147445 Twister,1996,Adventure,241688385 The Fast and the Furious,2001,Thriller,144512310 Cast Away,2000,Adventure,233630478 Happy Feet,2006,Music,197992827 The Bourne Supremacy,2004,Mystery,176049130 Air Force One,1997,Drama,172620724 Ocean's Eleven,2001,Crime,183405771 The Three Musketeers,2011,Romance,20315324 Hotel Transylvania,2012,Animation,148313048 Enchanted,2007,Animation,127706877 Safe House,2012,Thriller,126149655 102 Dalmatians,2000,Adventure,66941559 Tower Heist,2011,Action,78009155 The Holiday,2006,Romance,63224849 Enemy of the State,1998,Drama,111544445 It's Complicated,2009,Drama,112703470 Ocean's Thirteen,2007,Crime,117144465 Open Season,2006,Animation,84303558 Divergent,2014,Mystery,150832203 Enemy at the Gates,2001,War,51396781 The Rundown,2003,Action,47592825 Last Action Hero,1993,Comedy,50016394 Memoirs of a Geisha,2005,Drama,57010853 The Fast and the Furious: Tokyo Drift,2006,Action,62494975 Arthur Christmas,2011,Fantasy,46440491 Meet Joe Black,1998,Drama,44606335 Collateral Damage,2002,Drama,40048332 Mirror Mirror,2012,Adventure,64933670 Scott Pilgrim vs. the World,2010,Romance,31494270 The Core,2003,Action,31111260 Nutty Professor II: The Klumps,2000,Sci-Fi,123307945 Scooby-Doo,2002,Comedy,153288182 Dredd,2012,Action,13401683 Click,2006,Comedy,137340146 Cats & Dogs: The Revenge of Kitty Galore,2010,Action,43575716 Jumper,2008,Adventure,80170146 Hellboy II: The Golden Army,2008,Sci-Fi,75754670 Zodiac,2007,Mystery,33048353 The 6th Day,2000,Sci-Fi,34543701 Bruce Almighty,2003,Comedy,242589580 The Expendables,2010,Action,102981571 Mission: Impossible,1996,Adventure,180965237 The Hunger Games,2012,Sci-Fi,407999255 The Hangover Part II,2011,Comedy,254455986 Batman Returns,1992,Action,162831698 Over the Hedge,2006,Animation,155019340 Lilo & Stitch,2002,Family,145771527 Deep Impact,1998,Thriller,140459099 RED 2,2013,Crime,53215979 The Longest Yard,2005,Sport,158115031 Alvin and the Chipmunks: Chipwrecked,2011,Animation,133103929 Grown Ups 2,2013,Comedy,133668525 Get Smart,2008,Comedy,130313314 Something's Gotta Give,2003,Comedy,124590960 Shutter Island,2010,Mystery,127968405 Four Christmases,2008,Comedy,120136047 Robots,2005,Adventure,128200012 Face/Off,1997,Thriller,112225777 Bedtime Stories,2008,Romance,109993847 Road to Perdition,2002,Crime,104054514 Just Go with It,2011,Comedy,103028109 Con Air,1997,Action,101087161 Eagle Eye,2008,Action,101111837 Cold Mountain,2003,History,95632614 The Book of Eli,2010,Thriller,94822707 Flubber,1997,Sci-Fi,92969824 The Haunting,1999,Mystery,91188905 Space Jam,1996,Fantasy,90443603 The Pink Panther,2006,Comedy,82226474 The Day the Earth Stood Still,2008,Sci-Fi,79363785 Conspiracy Theory,1997,Thriller,76081498 Fury,2014,War,85707116 Six Days Seven Nights,1998,Comedy,74329966 Yogi Bear,2010,Family,100169068 Spirit: Stallion of the Cimarron,2002,Animation,73215310 Zookeeper,2011,Family,80360866 Lost in Space,1998,Action,69102910 The Manchurian Candidate,2004,Mystery,65948711 Hotel Transylvania 2,2015,Animation,169692572 Fantasia 2000,1999,Music,60507228 The Time Machine,2002,Adventure,56684819 Mighty Joe Young,1998,Thriller,50628009 Swordfish,2001,Action,69772969 The Legend of Zorro,2005,Action,45356386 What Dreams May Come,1998,Romance,55350897 Little Nicky,2000,Fantasy,39442871 The Brothers Grimm,2005,Adventure,37899638 Mars Attacks!,1996,Sci-Fi,37754208 Surrogates,2009,Sci-Fi,38542418 Thirteen Days,2000,History,34566746 Daylight,1996,Thriller,32885565 Walking with Dinosaurs 3D,2013,Animation,36073232 Battlefield Earth,2000,Adventure,21471685 Looney Tunes: Back in Action,2003,Family,20950820 Nine,2009,Romance,19673424 Timeline,2003,Adventure,19480739 The Postman,1997,Adventure,17593391 Babe: Pig in the City,1998,Fantasy,18318000 The Last Witch Hunter,2015,Fantasy,27356090 Red Planet,2000,Action,17473245 Arthur and the Invisibles,2006,Animation,15131330 Oceans,2009,Documentary,19406406 A Sound of Thunder,2005,Horror,1891821 Pompeii,2014,History,23219748 A Beautiful Mind,2001,Drama,170708996 The Lion King,1994,Animation,422783777 Journey 2: The Mysterious Island,2012,Adventure,103812241 Cloudy with a Chance of Meatballs 2,2013,Fantasy,119793567 Red Dragon,2002,Drama,92930005 Hidalgo,2004,Western,67286731 Jack and Jill,2011,Comedy,74158157 2 Fast 2 Furious,2003,Crime,127083765 The Little Prince,2015,Family,1339152 The Invasion,2007,Thriller,15071514 The Adventures of Rocky & Bullwinkle,2000,Family,26000610 The Secret Life of Pets,2016,Family,323505540 The League of Extraordinary Gentlemen,2003,Adventure,66462600 Despicable Me 2,2013,Sci-Fi,368049635 Independence Day,1996,Adventure,306124059 The Lost World: Jurassic Park,1997,Sci-Fi,229074524 Madagascar,2005,Comedy,193136719 Children of Men,2006,Thriller,35286428 X-Men,2000,Adventure,157299717 Wanted,2008,Action,134568845 The Rock,1996,Action,134006721 Ice Age: The Meltdown,2006,Action,195329763 50 First Dates,2004,Comedy,120776832 Hairspray,2007,Drama,118823091 Exorcist: The Beginning,2004,Mystery,41814863 Inspector Gadget,1999,Action,97360069 Now You See Me,2013,Thriller,117698894 Grown Ups,2010,Comedy,162001186 The Terminal,2004,Comedy,77032279 Hotel for Dogs,2009,Family,73023275 Vertical Limit,2000,Action,68473360 Charlie Wilson's War,2007,Comedy,66636385 Shark Tale,2004,Comedy,160762022 Dreamgirls,2006,Musical,103338338 Be Cool,2005,Crime,55808744 Munich,2005,Thriller,47379090 Tears of the Sun,2003,Action,43426961 Killers,2010,Comedy,47000485 The Man from U.N.C.L.E.,2015,Adventure,45434443 Spanglish,2004,Drama,42044321 Monster House,2006,Mystery,73661010 Bandits,2001,Comedy,41523271 First Knight,1995,Action,37600435 Anna and the King,1999,Drama,39251128 Immortals,2011,Drama,83503161 Hostage,2005,Action,34636443 Titan A.E.,2000,Adventure,22751979 Hollywood Homicide,2003,Thriller,30013346 Soldier,1998,Drama,14567883 Monkeybone,2001,Animation,5409517 Flight of the Phoenix,2004,Thriller,21009180 Unbreakable,2000,Drama,94999143 Minions,2015,Comedy,336029560 Sucker Punch,2011,Action,36381716 Snake Eyes,1998,Thriller,55585389 Sphere,1998,Drama,36976367 The Angry Birds Movie,2016,Comedy,107225164 Fool's Gold,2008,Adventure,70224196 Funny People,2009,Comedy,51814190 The Kingdom,2007,Thriller,47456450 Talladega Nights: The Ballad of Ricky Bobby,2006,Action,148213377 Dr. Dolittle 2,2001,Comedy,112950721 Braveheart,1995,History,75600000 Jarhead,2005,Action,62647540 The Simpsons Movie,2007,Comedy,183132370 The Majestic,2001,Drama,27796042 Driven,2001,Drama,32616869 Two Brothers,2004,Family,18947630 The Village,2004,Drama,114195633 Doctor Dolittle,1998,Comedy,144156464 Signs,2002,Sci-Fi,227965690 Shrek 2,2004,Comedy,436471036 Cars,2006,Comedy,244052771 Runaway Bride,1999,Romance,152149590 xXx,2002,Action,141204016 The SpongeBob Movie: Sponge Out of Water,2015,Family,162495848 Ransom,1996,Crime,136448821 Inglourious Basterds,2009,War,120523073 Hook,1991,Comedy,119654900 Hercules,2014,Adventure,72660029 Die Hard 2,1990,Action,117541000 S.W.A.T.,2003,Thriller,116643346 Vanilla Sky,2001,Thriller,100614858 Lady in the Water,2006,Mystery,42272747 AVP: Alien vs. Predator,2004,Thriller,80281096 Alvin and the Chipmunks: The Squeakquel,2009,Music,219613391 We Were Soldiers,2002,Action,78120196 Olympus Has Fallen,2013,Action,98895417 Star Trek: Insurrection,1998,Adventure,70117571 Battle Los Angeles,2011,Sci-Fi,83552429 Big Fish,2003,Drama,66257002 Wolf,1994,Horror,65012000 War Horse,2011,Drama,79883359 The Monuments Men,2014,War,78031620 The Abyss,1989,Thriller,54222000 Wall Street: Money Never Sleeps,2010,Drama,52474616 Dracula Untold,2014,Fantasy,55942830 The Siege,1998,Thriller,40932372 Stardust,2007,Romance,38345403 Seven Years in Tibet,1997,Drama,37901509 The Dilemma,2011,Drama,48430355 Bad Company,2002,Adventure,30157016 Doom,2005,Sci-Fi,28031250 I Spy,2002,Thriller,33105600 Underworld: Awakening,2012,Action,62321039 Rock of Ages,2012,Musical,38509342 Hart's War,2002,Drama,19076815 Killer Elite,2011,Thriller,25093607 Rollerball,2002,Sci-Fi,18990542 Ballistic: Ecks vs. Sever,2002,Crime,14294842 Hard Rain,1998,Drama,19819494 Osmosis Jones,2001,Adventure,13596911 Blackhat,2015,Action,7097125 Sky Captain and the World of Tomorrow,2004,Thriller,37760080 Basic Instinct 2,2006,Mystery,5851188 Escape Plan,2013,Crime,25121291 The Legend of Hercules,2014,Fantasy,18821279 The Sum of All Fears,2002,Drama,118471320 The Twilight Saga: Eclipse,2010,Fantasy,300523113 The Score,2001,Thriller,71069884 Despicable Me,2010,Family,251501645 Money Train,1995,Comedy,35324232 Ted 2,2015,Comedy,81257500 Agora,2009,History,617840 Mystery Men,1999,Fantasy,29655590 Hall Pass,2011,Comedy,45045037 The Insider,1999,Thriller,28965197 Body of Lies,2008,Drama,39380442 Abraham Lincoln: Vampire Hunter,2012,Horror,37516013 Entrapment,1999,Crime,87704396 The X Files,1998,Sci-Fi,83892374 The Last Legion,2007,Action,5932060 Saving Private Ryan,1998,Action,216119491 Need for Speed,2014,Crime,43568507 What Women Want,2000,Comedy,182805123 Ice Age,2002,Adventure,176387405 Dreamcatcher,2003,Drama,33685268 Lincoln,2012,War,182204440 The Matrix,1999,Action,171383253 Apollo 13,1995,Adventure,172071312 Total Recall,1990,Action,119412921 The Santa Clause 2,2002,Fantasy,139225854 Les Misérables,2012,Musical,148775460 You've Got Mail,1998,Romance,115731542 Step Brothers,2008,Comedy,100468793 The Mask of Zorro,1998,Adventure,93771072 Due Date,2010,Drama,100448498 Unbroken,2014,Sport,115603980 Space Cowboys,2000,Action,90454043 Cliffhanger,1993,Action,84049211 Broken Arrow,1996,Thriller,70450000 The Kid,2000,Family,69688384 World Trade Center,2006,History,70236496 Mona Lisa Smile,2003,Drama,63695760 The Dictator,2012,Romance,59617068 Eyes Wide Shut,1999,Mystery,55637680 Annie,2014,Comedy,85911262 Focus,2015,Crime,53846915 This Means War,2012,Comedy,54758461 Blade: Trinity,2004,Sci-Fi,52397389 Primary Colors,1998,Drama,38966057 Resident Evil: Retribution,2012,Action,42345531 Death Race,2008,Sci-Fi,36064910 The Long Kiss Goodnight,1996,Action,33328051 Proof of Life,2000,Drama,32598931 Zathura: A Space Adventure,2005,Adventure,28045540 Fight Club,1999,Drama,37023395 We Are Marshall,2006,Drama,43532294 Hudson Hawk,1991,Action,17218080 Lucky Numbers,2000,Crime,10014234 "I, Frankenstein",2014,Sci-Fi,19059018 Oliver Twist,2005,Drama,1987287 Elektra,2005,Action,24407944 Sin City: A Dame to Kill For,2014,Crime,13750556 Random Hearts,1999,Drama,31054924 Everest,2015,Biography,43247140 Perfume: The Story of a Murderer,2006,Fantasy,2208939 Austin Powers in Goldmember,2002,Comedy,213079163 Astro Boy,2009,Family,19548064 Jurassic Park,1993,Thriller,356784000 Wyatt Earp,1994,Biography,25052000 Clear and Present Danger,1994,Action,122012710 Dragon Blade,2015,Action,72413 Littleman,2006,Crime,58255287 U-571,2000,Action,77086030 The American President,1995,Comedy,65000000 The Love Guru,2008,Sport,32178777 3000 Miles to Graceland,2001,Comedy,15738632 The Hateful Eight,2015,Mystery,54116191 Blades of Glory,2007,Comedy,118153533 Hop,2011,Adventure,108012170 300,2006,Fantasy,210592590 Meet the Fockers,2004,Comedy,279167575 Marley & Me,2008,Comedy,143151473 The Green Mile,1999,Mystery,136801374 Chicken Little,2005,Animation,135381507 Gone Girl,2014,Mystery,167735396 The Bourne Identity,2002,Thriller,121468960 GoldenEye,1995,Adventure,106635996 The General's Daughter,1999,Thriller,102678089 The Truman Show,1998,Sci-Fi,125603360 The Prince of Egypt,1998,Fantasy,101217900 Daddy Day Care,2003,Comedy,104148781 2 Guns,2013,Comedy,75573300 Cats & Dogs,2001,Fantasy,93375151 The Italian Job,2003,Action,106126012 Two Weeks Notice,2002,Comedy,93307796 Antz,1998,Comedy,90646554 Couples Retreat,2009,Comedy,109176215 Days of Thunder,1990,Action,82670733 Cheaper by the Dozen 2,2005,Family,82569532 The Scorch Trials,2015,Sci-Fi,81687587 Eat Pray Love,2010,Drama,80574010 The Family Man,2000,Comedy,75764085 RED,2010,Action,90356857 Any Given Sunday,1999,Drama,75530832 The Horse Whisperer,1998,Romance,75370763 Collateral,2004,Thriller,100003492 The Scorpion King,2002,Action,90341670 Ladder 49,2004,Thriller,74540762 Jack Reacher,2012,Action,80033643 Deep Blue Sea,1999,Sci-Fi,73648142 This Is It,2009,Documentary,71844424 Contagion,2011,Thriller,75638743 Kangaroo Jack,2003,Comedy,66734992 Coraline,2009,Family,75280058 The Happening,2008,Thriller,64505912 Man on Fire,2004,Thriller,77862546 The Shaggy Dog,2006,Family,61112916 Starsky & Hutch,2004,Comedy,88200225 Jingle All the Way,1996,Family,60573641 Hellboy,2004,Sci-Fi,59035104 A Civil Action,1998,Drama,56702901 ParaNorman,2012,Family,55994557 The Jackal,1997,Crime,54910560 Paycheck,2003,Action,53789313 Up Close & Personal,1996,Romance,51045801 The Tale of Despereaux,2008,Animation,50818750 The Tuxedo,2002,Comedy,50189179 Under Siege 2: Dark Territory,1995,Action,50024083 Jack Ryan: Shadow Recruit,2014,Drama,50549107 Joy,2015,Comedy,56443482 London Has Fallen,2016,Drama,62401264 Alien: Resurrection,1997,Horror,47748610 Shooter,2007,Action,46975183 The Boxtrolls,2014,Family,50807639 Practical Magic,1998,Fantasy,46611204 The Lego Movie,2014,Adventure,257756197 Miss Congeniality 2: Armed and Fabulous,2005,Crime,48472213 Reign of Fire,2002,Action,43060566 Gangster Squad,2013,Drama,45996718 Year One,2009,Adventure,43337279 Invictus,2009,Drama,37479778 Duplicity,2009,Romance,40559930 My Favorite Martian,1999,Comedy,36830057 The Sentinel,2006,Thriller,36279230 Planet 51,2009,Adventure,42194060 Star Trek: Nemesis,2002,Sci-Fi,43119879 Intolerable Cruelty,2003,Romance,35096190 Edge of Darkness,2010,Mystery,43290977 The Relic,1997,Sci-Fi,33927476 Analyze That,2002,Comedy,32122249 Righteous Kill,2008,Action,40076438 Mercury Rising,1998,Action,32940507 The Soloist,2009,Biography,31670931 The Legend of Bagger Vance,2000,Fantasy,30695227 Almost Famous,2000,Music,32522352 xXx: State of the Union,2005,Crime,26082914 Priest,2011,Thriller,29136626 Sinbad: Legend of the Seven Seas,2003,Adventure,26288320 Event Horizon,1997,Horror,26616590 The Avengers,2012,Sci-Fi,623279547 Dragonfly,2002,Fantasy,30063805 The Black Dahlia,2006,Crime,22518325 Flyboys,2006,Adventure,13082288 The Last Castle,2001,Thriller,18208078 Supernova,2000,Thriller,14218868 Winter's Tale,2014,Drama,22451 The Mortal Instruments: City of Bones,2013,Mystery,31165421 Meet Dave,2008,Romance,11802056 Dark Water,2005,Horror,25472967 Edtv,1999,Drama,22362500 Inkheart,2008,Fantasy,17281832 The Spirit,2008,Crime,19781879 Mortdecai,2015,Mystery,7605668 In the Name of the King: A Dungeon Siege Tale,2007,Action,4535117 Beyond Borders,2003,Romance,4426297 The Great Raid,2005,Drama,10166502 Deadpool,2016,Adventure,363024263 Holy Man,1998,Drama,12065985 American Sniper,2014,Biography,350123553 Goosebumps,2015,Adventure,80021740 Just Like Heaven,2005,Romance,48291624 The Flintstones in Viva Rock Vegas,2000,Sci-Fi,35231365 Rambo III,1988,Action,53715611 Leatherheads,2008,Sport,31199215 Did You Hear About the Morgans?,2009,Comedy,29580087 The Internship,2013,Comedy,44665963 Resident Evil: Afterlife,2010,Action,60128566 Red Tails,2012,History,49875589 The Devil's Advocate,1997,Mystery,60984028 That's My Boy,2012,Comedy,36931089 DragonHeart,1996,Action,51317350 After the Sunset,2004,Drama,28328132 Ghost Rider: Spirit of Vengeance,2011,Thriller,51774002 Captain Corelli's Mandolin,2001,War,25528495 The Pacifier,2005,Family,113006880 Walking Tall,2004,Crime,45860039 Forrest Gump,1994,Comedy,329691196 Alvin and the Chipmunks,2007,Family,217326336 Meet the Parents,2000,Comedy,166225040 Pocahontas,1995,Romance,141600000 Superman,1978,Action,134218018 The Nutty Professor,1996,Comedy,128769345 Hitch,2005,Comedy,177575142 George of the Jungle,1997,Action,105263257 American Wedding,2003,Romance,104354205 Captain Phillips,2013,Thriller,107100855 Date Night,2010,Romance,98711404 Casper,1995,Comedy,100328194 The Equalizer,2014,Action,101530738 Maid in Manhattan,2002,Drama,93815117 Crimson Tide,1995,Drama,91400000 The Pursuit of Happyness,2006,Drama,162586036 Flightplan,2005,Drama,89706988 Disclosure,1994,Thriller,83000000 City of Angels,1998,Romance,78745923 Kill Bill: Vol. 1,2003,Action,70098138 Bowfinger,1999,Comedy,66365290 Kill Bill: Vol. 2,2004,Crime,66207920 Tango & Cash,1989,Thriller,63408614 Death Becomes Her,1992,Fantasy,58422650 Shanghai Noon,2000,Adventure,56932305 Executive Decision,1996,Adventure,68750000 Mr. Popper's Penguins,2011,Family,68218041 The Forbidden Kingdom,2008,Fantasy,25040293 Free Birds,2013,Animation,55747724 Alien 3,1992,Sci-Fi,55473600 Evita,1996,Biography,49994804 Ronin,1998,Thriller,41609593 The Ghost and the Darkness,1996,Adventure,38553833 Paddington,2014,Fantasy,76137505 The Watch,2012,Sci-Fi,34350553 The Hunted,2003,Drama,34238611 Instinct,1999,Thriller,34098563 Stuck on You,2003,Comedy,33828318 Semi-Pro,2008,Sport,33472850 The Pirates! Band of Misfits,2012,Animation,31051126 Changeling,2008,Mystery,35707327 Chain Reaction,1996,Action,20550712 The Fan,1996,Drama,18573791 The Phantom of the Opera,2004,Musical,51225796 Elizabeth: The Golden Age,2007,Drama,16264475 Æon Flux,2005,Sci-Fi,25857987 Gods and Generals,2003,History,12870569 Turbulence,1997,Thriller,11466088 Imagine That,2009,Family,16088610 Muppets Most Wanted,2014,Family,51178893 Thunderbirds,2004,Sci-Fi,6768055 Burlesque,2010,Music,39440655 A Very Long Engagement,2004,Romance,6167817 Blade II,2002,Action,81645152 Seven Pounds,2008,Drama,69951824 Bullet to the Head,2012,Action,9483821 The Godfather: Part III,1990,Drama,66676062 Elizabethtown,2005,Comedy,26838389 "You, Me and Dupree",2006,Comedy,75604320 Superman II,1980,Romance,108200000 Gigli,2003,Comedy,5660084 All the King's Men,2006,Drama,7221458 Shaft,2000,Thriller,70327868 Anastasia,1997,Fantasy,58297830 Moulin Rouge!,2001,Musical,57386369 Domestic Disturbance,2001,Thriller,45207112 Black Mass,2015,Crime,62563543 Flags of Our Fathers,2006,Drama,33574332 Law Abiding Citizen,2009,Crime,73343413 Grindhouse,2007,Horror,25031037 Beloved,1998,Drama,22843047 Lucky You,2007,Drama,5755286 Catch Me If You Can,2002,Biography,164435221 Zero Dark Thirty,2012,Drama,95720716 The Break-Up,2006,Drama,118683135 Mamma Mia!,2008,Musical,143704210 Valentine's Day,2010,Comedy,110476776 The Dukes of Hazzard,2005,Action,80270227 The Thin Red Line,1998,Drama,36385763 The Change-Up,2011,Fantasy,37035845 Man on the Moon,1999,Drama,34580635 Casino,1995,Biography,42438300 From Paris with Love,2010,Thriller,23324666 Bulletproof Monk,2003,Action,23020488 "Me, Myself & Irene",2000,Comedy,90567722 Barnyard,2006,Animation,72601713 The Twilight Saga: New Moon,2009,Fantasy,296623634 Shrek,2001,Adventure,267652016 The Adjustment Bureau,2011,Romance,62453315 Robin Hood: Prince of Thieves,1991,Romance,165500000 Jerry Maguire,1996,Sport,153620822 Ted,2012,Fantasy,218628680 As Good as It Gets,1997,Comedy,147637474 Patch Adams,1998,Drama,135014968 Anchorman 2: The Legend Continues,2013,Comedy,2175312 Mr. Deeds,2002,Comedy,126203320 Super 8,2011,Sci-Fi,126975169 Erin Brockovich,2000,Drama,125548685 How to Lose a Guy in 10 Days,2003,Romance,105807520 22 Jump Street,2014,Crime,191616238 Interview with the Vampire: The Vampire Chronicles,1994,Horror,105264608 Yes Man,2008,Comedy,97680195 Central Intelligence,2016,Comedy,126088877 Stepmom,1998,Comedy,91030827 Daddy's Home,2015,Family,150315155 Into the Woods,2014,Adventure,127997349 Inside Man,2006,Mystery,88504640 Payback,1999,Drama,81517441 Congo,1995,Mystery,81022333 Knowing,2009,Thriller,79948113 Failure to Launch,2006,Comedy,88658172 "Crazy, Stupid, Love.",2011,Romance,84244877 Garfield,2004,Comedy,75367693 Christmas with the Kranks,2004,Family,73701902 Moneyball,2011,Biography,75605492 Outbreak,1995,Thriller,67823573 Non-Stop,2014,Mystery,91439400 Race to Witch Mountain,2009,Thriller,67128202 V for Vendetta,2005,Action,70496802 Shanghai Knights,2003,Action,60470220 Curious George,2006,Adventure,58336565 Herbie Fully Loaded,2005,Sport,66002004 Don't Say a Word,2001,Crime,54997476 Hansel & Gretel: Witch Hunters,2013,Horror,55682070 Unfaithful,2002,Thriller,52752475 I Am Number Four,2011,Action,55092830 Syriana,2005,Drama,50815288 13 Hours,2016,Drama,52822418 The Book of Life,2014,Family,50150619 Firewall,2006,Crime,48745150 Absolute Power,1997,Thriller,50007168 G.I. Jane,1997,Action,48154732 The Game,1997,Thriller,48265581 Silent Hill,2006,Mystery,46982632 The Replacements,2000,Comedy,44737059 American Reunion,2012,Comedy,56724080 The Negotiator,1998,Mystery,44484065 Into the Storm,2014,Action,47553512 Beverly Hills Cop III,1994,Thriller,42610000 Gremlins 2: The New Batch,1990,Horror,41482207 The Judge,2014,Crime,47105085 The Peacemaker,1997,Thriller,41256277 Resident Evil: Apocalypse,2004,Sci-Fi,50740078 Bridget Jones: The Edge of Reason,2004,Comedy,40203020 Out of Time,2003,Thriller,40905277 On Deadly Ground,1994,Thriller,38590500 The Adventures of Sharkboy and Lavagirl 3-D,2005,Adventure,39177541 The Beach,2000,Drama,39778599 Raising Helen,2004,Drama,37486138 Ninja Assassin,2009,Action,38105077 For Love of the Game,1999,Sport,35168395 Striptease,1996,Thriller,32800000 Marmaduke,2010,Comedy,33643461 Hereafter,2010,Drama,32741596 Murder by Numbers,2002,Crime,31874869 Assassins,1995,Crime,30306268 Hannibal Rising,2007,Drama,27667947 The Story of Us,1999,Romance,27067160 The Host,2013,Action,26616999 Basic,2003,Thriller,26536120 Blood Work,2002,Drama,26199517 The International,2009,Drama,25450527 Escape from L.A.,1996,Adventure,25407250 The Iron Giant,1999,Comedy,23159305 The Life Aquatic with Steve Zissou,2004,Drama,24006726 Free State of Jones,2016,Biography,20389967 The Life of David Gale,2003,Thriller,19593740 Man of the House,2005,Comedy,19118247 Run All Night,2015,Action,26442251 Eastern Promises,2007,Mystery,17114882 Into the Blue,2005,Thriller,18472363 The Messenger: The Story of Joan of Arc,1999,History,14131298 Your Highness,2011,Fantasy,21557240 Dream House,2011,Drama,21283440 Mad City,1997,Drama,10556196 Baby's Day Out,1994,Crime,16671505 The Scarlet Letter,1995,Romance,10400000 Fair Game,2010,Biography,9528092 Domino,2005,Action,10137232 Jade,1995,Drama,9795017 Gamer,2009,Thriller,20488579 Beautiful Creatures,2013,Romance,19445217 Death to Smoochy,2002,Comedy,8355815 Zoolander 2,2016,Comedy,28837115 The Big Bounce,2004,Comedy,6471394 What Planet Are You From?,2000,Sci-Fi,6291602 Drive Angry,2011,Thriller,10706786 Street Fighter: The Legend of Chun-Li,2009,Crime,8742261 The One,2001,Action,43905746 The Adventures of Ford Fairlane,1990,Action,21413502 Traffic,2000,Thriller,124107476 Indiana Jones and the Last Crusade,1989,Action,197171806 Chappie,2015,Action,31569268 The Bone Collector,1999,Mystery,66488090 Panic Room,2002,Drama,95308367 Three Kings,1999,Adventure,60652036 Child 44,2015,Thriller,1206135 Rat Race,2001,Adventure,56607223 K-PAX,2001,Drama,50173190 Kate & Leopold,2001,Comedy,47095453 Bedazzled,2000,Romance,37879996 The Cotton Club,1984,Drama,25900000 3:10 to Yuma,2007,Adventure,53574088 Taken 3,2014,Action,89253340 Out of Sight,1998,Thriller,37339525 The Cable Guy,1996,Comedy,60154431 Dick Tracy,1990,Crime,103738726 The Thomas Crown Affair,1999,Crime,69304264 Riding in Cars with Boys,2001,Comedy,29781453 Happily N'Ever After,2006,Adventure,15519841 Mary Reilly,1996,Drama,5600000 My Best Friend's Wedding,1997,Comedy,126805112 America's Sweethearts,2001,Romance,93607673 Insomnia,2002,Thriller,67263182 Star Trek: First Contact,1996,Sci-Fi,92001027 Jonah Hex,2010,Fantasy,10539414 Courage Under Fire,1996,Action,58918501 Liar Liar,1997,Comedy,181395380 The Flintstones,1994,Comedy,130512915 Taken 2,2012,Thriller,139852971 Scary Movie 3,2003,Comedy,110000082 Miss Congeniality,2000,Romance,106807667 Journey to the Center of the Earth,2008,Adventure,101702060 The Princess Diaries 2: Royal Engagement,2004,Family,95149435 The Pelican Brief,1993,Mystery,100768056 The Client,1994,Drama,92115211 The Bucket List,2007,Drama,93452056 Patriot Games,1992,Thriller,83287363 Monster-in-Law,2005,Romance,82931301 Prisoners,2013,Mystery,60962878 Training Day,2001,Thriller,76261036 Galaxy Quest,1999,Sci-Fi,71423726 Scary Movie 2,2001,Comedy,71277420 The Muppets,2011,Musical,88625922 Blade,1998,Horror,70001065 Coach Carter,2005,Drama,67253092 Changing Lanes,2002,Drama,66790248 Anaconda,1997,Adventure,65557989 Coyote Ugly,2000,Drama,60786269 Love Actually,2003,Drama,59365105 A Bug's Life,1998,Fantasy,162792677 From Hell,2001,Thriller,31598308 The Specialist,1994,Crime,57362581 Tin Cup,1996,Comedy,53854588 Kicking & Screaming,2005,Romance,52580895 The Hitchhiker's Guide to the Galaxy,2005,Adventure,51019112 Fat Albert,2004,Romance,48114556 Resident Evil: Extinction,2007,Horror,50648679 Blended,2014,Comedy,46280507 Last Holiday,2006,Adventure,38360195 The River Wild,1994,Crime,46815748 The Indian in the Cupboard,1995,Drama,35617599 Savages,2012,Drama,47307550 Cellular,2004,Crime,32003620 Johnny English,2003,Adventure,27972410 The Ant Bully,2006,Family,28133159 Dune,1984,Adventure,27400000 Across the Universe,2007,Drama,24343673 Revolutionary Road,2008,Drama,22877808 16 Blocks,2006,Drama,36883539 Babylon A.D.,2008,Sci-Fi,22531698 The Glimmer Man,1996,Comedy,20400913 Multiplicity,1996,Sci-Fi,20101861 Aliens in the Attic,2009,Sci-Fi,25200412 The Pledge,2001,Mystery,19719930 The Producers,2005,Musical,19377727 Dredd,2012,Action,13401683 The Phantom,1996,Comedy,17300889 All the Pretty Horses,2000,Western,15527125 Nixon,1995,Drama,13560960 The Ghost Writer,2010,Mystery,15523168 Deep Rising,1998,Horror,11146409 Miracle at St. Anna,2008,War,7916887 Curse of the Golden Flower,2006,Drama,6565495 Bangkok Dangerous,2008,Crime,15279680 Big Trouble,2002,Crime,7262288 Love in the Time of Cholera,2007,Romance,4584886 Shadow Conspiracy,1997,Thriller,2154540 Johnny English Reborn,2011,Crime,8129455 Argo,2012,Biography,136019448 The Fugitive,1993,Thriller,183875760 The Bounty Hunter,2010,Action,67061228 Sleepers,1996,Crime,53300852 Rambo: First Blood Part II,1985,Action,150415432 The Juror,1996,Thriller,44834712 Pinocchio,1940,Fantasy,84300000 Heaven's Gate,1980,Western,1500000 Underworld: Evolution,2006,Fantasy,62318875 Victor Frankenstein,2015,Thriller,5773519 Finding Forrester,2000,Drama,51768623 28 Days,2000,Comedy,37035515 Unleashed,2005,Drama,24520892 The Sweetest Thing,2002,Romance,24430272 The Firm,1993,Thriller,158348400 Charlie St. Cloud,2010,Fantasy,31136950 The Mechanic,2011,Crime,29113588 21 Jump Street,2012,Action,138447667 Notting Hill,1999,Drama,116006080 Chicken Run,2000,Animation,106793915 Along Came Polly,2004,Comedy,87856565 Boomerang,1992,Drama,70100000 The Heat,2013,Crime,159578352 Cleopatra,1963,Drama,57750000 Here Comes the Boom,2012,Sport,45290318 High Crimes,2002,Mystery,41543207 The Mirror Has Two Faces,1996,Drama,41252428 The Mothman Prophecies,2002,Horror,35228696 Brüno,2009,Comedy,59992760 Licence to Kill,1989,Thriller,34667015 Red Riding Hood,2011,Horror,37652565 15 Minutes,2001,Crime,24375436 Super Mario Bros.,1993,Fantasy,20915465 Lord of War,2005,Thriller,24127895 Hero,2002,Adventure,84961 One for the Money,2012,Comedy,26404753 The Interview,2014,Comedy,6105175 The Warrior's Way,2010,Action,5664251 Micmacs,2009,Action,1260917 8 Mile,2002,Music,116724075 A Knight's Tale,2001,Action,56083966 The Medallion,2003,Action,22108977 The Sixth Sense,1999,Mystery,293501675 Man on a Ledge,2012,Thriller,18600911 The Big Year,2011,Comedy,7204138 The Karate Kid,1984,Action,90800000 American Hustle,2013,Crime,150117807 The Proposal,2009,Drama,163947053 Double Jeopardy,1999,Crime,116735231 Back to the Future Part II,1989,Sci-Fi,118500000 Lucy,2014,Thriller,126546825 Fifty Shades of Grey,2015,Drama,166147885 Spy Kids 3-D: Game Over,2003,Family,111760631 A Time to Kill,1996,Drama,108706165 Cheaper by the Dozen,2003,Comedy,138614544 Lone Survivor,2013,Action,125069696 A League of Their Own,1992,Drama,107458785 The Conjuring 2,2016,Mystery,102310175 The Social Network,2010,Drama,96917897 He's Just Not That Into You,2009,Drama,93952276 Scary Movie 4,2006,Comedy,90703745 Scream 3,2000,Horror,89138076 Back to the Future Part III,1990,Western,87666629 Get Hard,2015,Comedy,90353764 Bram Stoker's Dracula,1992,Horror,82522790 Julie & Julia,2009,Biography,94125426 42,2013,Drama,95001343 The Talented Mr. Ripley,1999,Thriller,81292135 Dumb and Dumber To,2014,Comedy,86208010 Eight Below,2006,Adventure,81593527 The Intern,2015,Drama,75274748 Ride Along 2,2016,Comedy,90835030 The Last of the Mohicans,1992,Drama,72455275 Ray,2004,Drama,75305995 Sin City,2005,Crime,74098862 Vantage Point,2008,Thriller,72266306 "I Love You, Man",2009,Romance,71347010 Shallow Hal,2001,Romance,70836296 JFK,1991,History,70405498 Big Momma's House 2,2006,Comedy,70163652 The Mexican,2001,Adventure,66808615 Unbroken,2014,War,115603980 17 Again,2009,Fantasy,64149837 The Other Woman,2014,Comedy,83906114 The Final Destination,2009,Horror,66466372 Bridge of Spies,2015,Thriller,72306065 Behind Enemy Lines,2001,Drama,59068786 Shall We Dance,2004,Romance,57887882 Small Soldiers,1998,Comedy,53955614 Spawn,1997,Action,54967359 The Count of Monte Cristo,2002,Adventure,54228104 The Lincoln Lawyer,2011,Drama,57981889 Unknown,2011,Action,61094903 The Prestige,2006,Mystery,53082743 Horrible Bosses 2,2014,Comedy,54414716 Escape from Planet Earth,2013,Adventure,57011847 Apocalypto,2006,Thriller,50859889 The Living Daylights,1987,Action,51185897 Predators,2010,Action,52000688 Legal Eagles,1986,Romance,49851591 Secret Window,2004,Mystery,47781388 The Lake House,2006,Drama,52320979 The Skeleton Key,2005,Thriller,47806295 The Odd Life of Timothy Green,2012,Comedy,51853450 Made of Honor,2008,Romance,46012734 Jersey Boys,2014,Music,47034272 The Rainmaker,1997,Drama,45856732 Gothika,2003,Thriller,59588068 Amistad,1997,History,44175394 Medicine Man,1992,Romance,45500797 Aliens vs. Predator: Requiem,2007,Horror,41797066 Ri¢hie Ri¢h,1994,Family,38087756 Autumn in New York,2000,Romance,37752931 Paul,2011,Comedy,37371385 The Guilt Trip,2012,Comedy,37101011 Scream 4,2011,Mystery,38176892 8MM,1999,Mystery,36283504 The Doors,1991,Music,35183792 Sex Tape,2014,Comedy,38543473 Hanging Up,2000,Drama,36037909 Final Destination 5,2011,Horror,42575718 Mickey Blue Eyes,1999,Romance,33864342 Pay It Forward,2000,Drama,33508922 Fever Pitch,2005,Sport,42071069 Drillbit Taylor,2008,Comedy,32853640 A Million Ways to Die in the West,2014,Western,42615685 The Shadow,1994,Adventure,32055248 Extremely Loud & Incredibly Close,2011,Mystery,31836745 Morning Glory,2010,Drama,30993544 Get Rich or Die Tryin',2005,Biography,30981850 The Art of War,2000,Adventure,30199105 Rent,2005,Drama,29077547 Bless the Child,2000,Drama,29374178 The Out-of-Towners,1999,Comedy,28535768 The Island of Dr. Moreau,1996,Sci-Fi,27663982 The Musketeer,2001,Action,27053815 The Other Boleyn Girl,2008,Drama,26814957 Sweet November,2001,Drama,25178165 The Reaping,2007,Thriller,25117498 Mean Streets,1973,Drama,32645 Renaissance Man,1994,Comedy,24332324 Colombiana,2011,Crime,36665854 The Magic Sword: Quest for Camelot,1998,Family,22717758 City by the Sea,2002,Thriller,22433915 At First Sight,1999,Drama,22326247 Torque,2004,Comedy,21176322 City Hall,1996,Drama,20300000 Marie Antoinette,2006,Drama,15962471 Kiss of Death,1995,Thriller,14942422 Get Carter,2000,Drama,14967182 The Impossible,2012,Thriller,18996755 Ishtar,1987,Action,14375181 Fantastic Mr. Fox,2009,Crime,20999103 Life or Something Like It,2002,Romance,14448589 Memoirs of an Invisible Man,1992,Comedy,14358033 Amélie,2001,Comedy,33201661 New York Minute,2004,Comedy,14018364 Alfie,2004,Romance,13395939 Big Miracle,2012,Romance,20113965 The Deep End of the Ocean,1999,Drama,13376506 Feardotcom,2002,Thriller,13208023 Cirque du Freak: The Vampire's Assistant,2009,Fantasy,13838130 Victor Frankenstein,2015,Horror,5773519 Duplex,2003,Comedy,9652000 Raise the Titanic,1980,Adventure,7000000 Universal Soldier: The Return,1999,Action,10431220 Pandorum,2009,Action,10326062 Impostor,2001,Mystery,6114237 Extreme Ops,2002,Thriller,4835968 Just Visiting,2001,Fantasy,4777007 Sunshine,2007,Thriller,3675072 A Thousand Words,2012,Drama,18438149 Delgo,2008,Adventure,511920 The Gunman,2015,Action,10640645 Alex Rider: Operation Stormbreaker,2006,Adventure,652526 Disturbia,2007,Drama,80050171 Hackers,1995,Thriller,7564000 The Hunting Party,2007,Thriller,876671 The Hudsucker Proxy,1994,Fantasy,2869369 The Warlords,2007,History,128978 Nomad: The Warrior,2005,War,77231 Snowpiercer,2013,Thriller,4563029 The Crow,1994,Fantasy,50693162 The Time Traveler's Wife,2009,Fantasy,63411478 The Fast and the Furious,2001,Crime,144512310 Frankenweenie,2012,Horror,35287788 Serenity,2005,Thriller,25335935 Against the Ropes,2004,Romance,5881504 Superman III,1983,Sci-Fi,60000000 Grudge Match,2013,Comedy,29802761 Red Cliff,2008,History,626809 Sweet Home Alabama,2002,Romance,127214072 The Ugly Truth,2009,Romance,88915214 Sgt. Bilko,1996,Comedy,30400000 Spy Kids 2: Island of Lost Dreams,2002,Action,85570368 Star Trek: Generations,1994,Thriller,75668868 The Grandmaster,2013,Drama,6594136 Water for Elephants,2011,Romance,58700247 The Hurricane,1999,Drama,50668906 Enough,2002,Crime,39177215 Heartbreakers,2001,Crime,40334024 Paul Blart: Mall Cop 2,2015,Action,71038190 Angel Eyes,2001,Drama,24044532 Joe Somebody,2001,Comedy,22770864 The Ninth Gate,1999,Thriller,18653746 Extreme Measures,1996,Thriller,17305211 Rock Star,2001,Drama,16991902 Precious,2009,Drama,47536959 White Squall,1996,Adventure,10300000 The Thing,1982,Mystery,13782838 Riddick,2013,Action,41997790 Switchback,1997,Mystery,6482195 Texas Rangers,2001,Action,623374 City of Ember,2008,Family,7871693 The Master,2012,Drama,16377274 The Express,2008,Drama,9589875 The 5th Wave,2016,Thriller,34912982 Creed,2015,Sport,109712885 The Town,2010,Thriller,92173235 What to Expect When You're Expecting,2012,Comedy,41102171 Burn After Reading,2008,Drama,60338891 Nim's Island,2008,Adventure,48006503 Rush,2013,Action,26903709 Magnolia,1999,Drama,22450975 Cop Out,2010,Crime,44867349 How to Be Single,2016,Romance,46813366 Dolphin Tale,2011,Drama,72279690 Twilight,2008,Romance,191449475 John Q,2002,Thriller,71026631 Blue Streak,1999,Thriller,68208190 We're the Millers,2013,Comedy,150368971 Breakdown,1997,Thriller,50129186 Never Say Never Again,1983,Action,55500000 Hot Tub Time Machine,2010,Sci-Fi,50213619 Dolphin Tale 2,2014,Family,42019483 Reindeer Games,2000,Family,23360779 A Man Apart,2003,Action,26183197 Aloha,2015,Drama,20991497 Ghosts of Mississippi,1996,Drama,13052741 Snow Falling on Cedars,1999,Drama,14378353 The Rite,2011,Mystery,33037754 Gattaca,1997,Drama,12339633 Isn't She Great,2000,Biography,2954405 Space Chimps,2008,Animation,30105968 Head of State,2003,Comedy,37788228 The Hangover,2009,Comedy,277313371 Ip Man 3,2015,History,2126511 Austin Powers: The Spy Who Shagged Me,1999,Comedy,205399422 Batman,1989,Action,251188924 There Be Dragons,2011,War,1068392 Lethal Weapon 3,1992,Crime,144731527 The Blind Side,2009,Biography,255950375 Spy Kids,2001,Adventure,112692062 Horrible Bosses,2011,Crime,117528646 True Grit,2010,Adventure,171031347 The Devil Wears Prada,2006,Comedy,124732962 Star Trek: The Motion Picture,1979,Mystery,82300000 Identity Thief,2013,Comedy,134455175 Cape Fear,1991,Thriller,79100000 21,2008,Thriller,81159365 Trainwreck,2015,Romance,110008260 Guess Who,2005,Comedy,67962333 The English Patient,1996,War,78651430 L.A. Confidential,1997,Crime,64604977 Sky High,2005,Comedy,63939454 In & Out,1997,Comedy,63826569 Species,1995,Thriller,60054449 A Nightmare on Elm Street,1984,Horror,26505000 The Cell,2000,Horror,61280963 The Man in the Iron Mask,1998,Action,56876365 Secretariat,2010,Sport,59699513 TMNT,2007,Comedy,54132596 Radio,2003,Sport,52277485 Friends with Benefits,2011,Comedy,55802754 Neighbors 2: Sorority Rising,2016,Comedy,55291815 Saving Mr. Banks,2013,History,83299761 Malcolm X,1992,History,48169908 This Is 40,2012,Comedy,67523385 Old Dogs,2009,Comedy,49474048 Underworld: Rise of the Lycans,2009,Fantasy,45802315 License to Wed,2007,Comedy,43792641 The Benchwarmers,2006,Sport,57651794 Must Love Dogs,2005,Romance,43894863 Donnie Brasco,1997,Crime,41954997 Resident Evil,2002,Horror,39532308 Poltergeist,1982,Fantasy,76600000 The Ladykillers,2004,Comedy,39692139 Max Payne,2008,Crime,40687294 In Time,2011,Thriller,37553932 The Back-up Plan,2010,Comedy,37481242 Something Borrowed,2011,Comedy,39026186 Black Knight,2001,Adventure,33422806 Street Fighter,1994,Action,33423521 The Pianist,2002,War,32519322 From Hell,2001,Thriller,31598308 The Nativity Story,2006,Drama,37617947 House of Wax,2005,Horror,32048809 Closer,2004,Drama,33987757 J. Edgar,2011,Drama,37304950 Mirrors,2008,Horror,30691439 Queen of the Damned,2002,Horror,30307804 Predator 2,1990,Sci-Fi,30669413 Untraceable,2008,Crime,28687835 Blast from the Past,1999,Comedy,26494611 Jersey Girl,2004,Comedy,25266129 Alex Cross,2012,Thriller,25863915 Midnight in the Garden of Good and Evil,1997,Mystery,25078937 Nanny McPhee Returns,2010,Fantasy,28995450 Hoffa,1992,Biography,24276500 The X Files: I Want to Believe,2008,Drama,20981633 Ella Enchanted,2004,Fantasy,22913677 Concussion,2015,Drama,34531832 Abduction,2011,Thriller,28064226 Valiant,2005,Adventure,19447478 Wonder Boys,2000,Drama,19389454 Superhero Movie,2008,Sci-Fi,25871834 Broken City,2013,Thriller,19692608 Cursed,2005,Comedy,19294901 Premium Rush,2012,Action,20275446 Hot Pursuit,2015,Comedy,34507079 The Four Feathers,2002,Romance,18306166 Parker,2013,Action,17609982 Wimbledon,2004,Romance,16831505 Furry Vengeance,2010,Family,17596256 Lions for Lambs,2007,Thriller,14998070 Flight of the Intruder,1991,Action,14587732 Walk Hard: The Dewey Cox Story,2007,Comedy,18317151 The Shipping News,2001,Drama,11405825 American Outlaws,2001,Action,13264986 The Young Victoria,2009,History,10991381 Whiteout,2009,Action,10268846 The Tree of Life,2011,Drama,13303319 Knock Off,1998,Action,10076136 Sabotage,2014,Action,10499968 The Order,2003,Mystery,7659747 Punisher: War Zone,2008,Action,7948159 Zoom,2006,Family,11631245 The Walk,2015,Biography,10137502 Warriors of Virtue,1997,Action,6448817 A Good Year,2006,Comedy,7458269 Radio Flyer,1992,Drama,4651977 "Blood In, Blood Out",1993,Drama,4496583 Smilla's Sense of Snow,1997,Thriller,2221994 Femme Fatale,2002,Thriller,6592103 Ride with the Devil,1999,War,630779 The Maze Runner,2014,Thriller,102413606 Unfinished Business,2015,Comedy,10214013 The Age of Innocence,1993,Romance,32000000 The Fountain,2006,Drama,10139254 Chill Factor,1999,Comedy,11227940 Stolen,2012,Thriller,183125 Ponyo,2008,Fantasy,15081783 The Longest Ride,2015,Romance,37432299 The Astronaut's Wife,1999,Sci-Fi,10654581 I Dreamed of Africa,2000,Romance,6543194 Playing for Keeps,2012,Romance,13101142 Mandela: Long Walk to Freedom,2013,Biography,8324748 A Few Good Men,1992,Drama,141340178 Exit Wounds,2001,Drama,51758599 Big Momma's House,2000,Comedy,117559438 The Darkest Hour,2011,Thriller,21426805 Step Up Revolution,2012,Romance,35057332 Snakes on a Plane,2006,Action,34014398 The Watcher,2000,Horror,28927720 The Punisher,2004,Crime,33682273 Goal! The Dream Begins,2005,Romance,4280577 Safe,2012,Crime,17120019 Pushing Tin,1999,Comedy,8406264 Star Wars: Episode VI - Return of the Jedi,1983,Sci-Fi,309125409 Doomsday,2008,Action,10955425 The Reader,2008,Romance,34180954 Elf,2003,Family,173381405 Phenomenon,1996,Fantasy,104632573 Snow Dogs,2002,Comedy,81150692 Scrooged,1988,Drama,60328558 Nacho Libre,2006,Comedy,80197993 Bridesmaids,2011,Romance,169076745 This Is the End,2013,Fantasy,101470202 Stigmata,1999,Horror,50041732 Men of Honor,2000,Biography,48814909 Takers,2010,Crime,57744720 The Big Wedding,2013,Comedy,21784432 "Big Mommas: Like Father, Like Son",2011,Comedy,37911876 Source Code,2011,Mystery,54696902 Alive,1993,Adventure,36733909 The Number 23,2007,Thriller,35063732 The Young and Prodigious T.S. Spivet,2013,Family,99462 Dreamer: Inspired by a True Story,2005,Drama,32701088 A History of Violence,2005,Crime,31493782 Transporter 2,2005,Crime,43095600 The Quick and the Dead,1995,Thriller,18636537 Laws of Attraction,2004,Comedy,17848322 Bringing Out the Dead,1999,Drama,16640210 Repo Men,2010,Thriller,13763130 Dragon Wars: D-War,2007,Horror,10956379 Bogus,1996,Fantasy,4357000 The Incredible Burt Wonderstone,2013,Comedy,22525921 Cats Don't Dance,1997,Fantasy,3562749 Cradle Will Rock,1999,Drama,2899970 The Good German,2006,Thriller,1304837 Apocalypse Now,1979,War,78800000 Going the Distance,2010,Comedy,17797316 Mr. Holland's Opus,1995,Drama,82528097 Criminal,2016,Thriller,14268533 Out of Africa,1985,Romance,87100000 Flight,2012,Thriller,93749203 Moonraker,1979,Sci-Fi,62700000 The Grand Budapest Hotel,2014,Crime,59073773 Hearts in Atlantis,2001,Mystery,24185781 Arachnophobia,1990,Fantasy,53133888 Frequency,2000,Sci-Fi,44983704 Ghostbusters,2016,Fantasy,118099659 Vacation,2015,Comedy,58879132 Get Shorty,1995,Crime,72077000 Chicago,2002,Musical,170684505 Big Daddy,1999,Comedy,163479795 American Pie 2,2001,Comedy,145096820 Toy Story,1995,Comedy,191796233 Speed,1994,Thriller,121248145 The Vow,2012,Drama,125014030 Extraordinary Measures,2010,Drama,11854694 Remember the Titans,2000,Biography,115648585 The Hunt for Red October,1990,Action,122012643 Lee Daniels' The Butler,2013,Biography,116631310 Dodgeball: A True Underdog Story,2004,Comedy,114324072 The Addams Family,1991,Fantasy,113502246 Ace Ventura: When Nature Calls,1995,Comedy,108360000 The Princess Diaries,2001,Comedy,108244774 The First Wives Club,1996,Comedy,105444419 Se7en,1995,Crime,100125340 District 9,2009,Sci-Fi,115646235 The SpongeBob SquarePants Movie,2004,Animation,85416609 Mystic River,2003,Mystery,90135191 Million Dollar Baby,2004,Sport,100422786 Analyze This,1999,Crime,106694016 The Notebook,2004,Drama,64286 27 Dresses,2008,Romance,76806312 Hannah Montana: The Movie,2009,Romance,79566871 Rugrats in Paris: The Movie,2000,Comedy,76501438 The Prince of Tides,1991,Romance,74787599 Legends of the Fall,1994,War,66528842 Up in the Air,2009,Romance,83813460 About Schmidt,2002,Comedy,65010106 Warm Bodies,2013,Romance,66359959 Looper,2012,Crime,66468315 Down to Earth,2001,Comedy,64172251 Babe,1995,Drama,66600000 Hope Springs,2012,Romance,63536011 Forgetting Sarah Marshall,2008,Romance,62877175 Four Brothers,2005,Thriller,74484168 Baby Mama,2008,Comedy,60269340 Hope Floats,1998,Romance,60033780 Bride Wars,2009,Comedy,58715510 Without a Paddle,2004,Adventure,58156435 13 Going on 30,2004,Romance,56044241 Midnight in Paris,2011,Comedy,56816662 The Nut Job,2014,Adventure,64238770 Blow,2001,Drama,52937130 Message in a Bottle,1999,Drama,52799004 Star Trek V: The Final Frontier,1989,Thriller,55210049 Like Mike,2002,Sport,51432423 Naked Gun 33 1/3: The Final Insult,1994,Crime,51109400 A View to a Kill,1985,Adventure,50300000 The Curse of the Were-Rabbit,2005,Mystery,56068547 P.S. I Love You,2007,Drama,53680848 Atonement,2007,Mystery,50921738 Letters to Juliet,2010,Romance,53021560 Black Rain,1989,Action,45645204 Corpse Bride,2005,Romance,53337608 Sicario,2015,Mystery,46875468 Southpaw,2015,Drama,52418902 Drag Me to Hell,2009,Thriller,42057340 The Age of Adaline,2015,Drama,42478175 Secondhand Lions,2003,Drama,41407470 Step Up 3D,2010,Music,42385520 Blue Crush,2002,Romance,40118420 Stranger Than Fiction,2006,Fantasy,40137776 30 Days of Night,2007,Horror,39568996 The Cabin in the Woods,2012,Fantasy,42043633 Meet the Spartans,2008,Comedy,38232624 Midnight Run,1988,Action,38413606 The Running Man,1987,Action,38122105 Little Shop of Horrors,1986,Sci-Fi,38747385 Hanna,2011,Thriller,40247512 Mortal Kombat: Annihilation,1997,Fantasy,35927406 Larry Crowne,2011,Comedy,35565975 Carrie,2013,Horror,35266619 Take the Lead,2006,Music,34703228 Gridiron Gang,2006,Sport,38432823 What's the Worst That Could Happen?,2001,Crime,32095318 9,2009,Mystery,31743332 Side Effects,2013,Crime,32154410 Winnie the Pooh,2011,Animation,26687172 Dumb and Dumberer: When Harry Met Lloyd,2003,Comedy,26096584 Bulworth,1998,Drama,26525834 Get on Up,2014,Biography,30513940 One True Thing,1998,Drama,23209440 Virtuosity,1995,Thriller,24048000 My Super Ex-Girlfriend,2006,Sci-Fi,22526144 Deliver Us from Evil,2014,Thriller,30523568 Sanctum,2011,Adventure,23070045 Little Black Book,2004,Comedy,20422207 The Five-Year Engagement,2012,Romance,28644770 Mr 3000,2004,Drama,21800302 The Next Three Days,2010,Drama,21129348 Ultraviolet,2006,Thriller,18500966 Assault on Precinct 13,2005,Action,19976073 The Replacement Killers,1998,Thriller,18967571 Fled,1996,Romance,17100000 Eight Legged Freaks,2002,Horror,17266505 Love & Other Drugs,2010,Comedy,32357532 88 Minutes,2007,Thriller,16930884 North Country,2005,Drama,18324242 The Whole Ten Yards,2004,Thriller,16323969 Splice,2009,Sci-Fi,16999046 Howard the Duck,1986,Romance,16295774 Pride and Glory,2008,Crime,15709385 The Cave,2005,Thriller,14888028 Alex & Emma,2003,Comedy,14208384 Wicker Park,2004,Thriller,12831121 Fright Night,2011,Horror,18298649 The New World,2005,History,12712093 Wing Commander,1999,Sci-Fi,11576087 In Dreams,1999,Thriller,11900000 Dragonball: Evolution,2009,Thriller,9353573 The Last Stand,2013,Crime,12026670 Godsend,2004,Drama,14334645 Chasing Liberty,2004,Romance,12189514 Hoodwinked Too! Hood vs. Evil,2011,Animation,10134754 An Unfinished Life,2005,Drama,8535575 The Imaginarium of Doctor Parnassus,2009,Fantasy,7689458 Runner Runner,2013,Crime,19316646 Antitrust,2001,Thriller,10965209 Glory,1989,War,26830000 Once Upon a Time in America,1984,Crime,5300000 Dead Man Down,2013,Thriller,10880926 The Merchant of Venice,2004,Drama,3752725 The Good Thief,2002,Crime,3517797 Miss Potter,2006,Biography,2975649 The Promise,2005,Fantasy,668171 DOA: Dead or Alive,2006,Adventure,480314 The Assassination of Jesse James by the Coward Robert Ford,2007,History,3904982 1911,2011,History,127437 Machine Gun Preacher,2011,Biography,537580 Pitch Perfect 2,2015,Comedy,183436380 Walk the Line,2005,Biography,119518352 Keeping the Faith,2000,Drama,37036404 The Borrowers,1997,Family,22359293 Frost/Nixon,2008,Drama,18593156 Serving Sara,2002,Comedy,16930185 The Boss,2016,Comedy,63034755 Cry Freedom,1987,Biography,5899797 Mumford,1999,Drama,4554569 Seed of Chucky,2004,Comedy,17016190 The Jacket,2005,Drama,6301131 Aladdin,1992,Animation,217350219 Straight Outta Compton,2015,Crime,161029270 Indiana Jones and the Temple of Doom,1984,Adventure,179870271 The Rugrats Movie,1998,Drama,100491683 Along Came a Spider,2001,Drama,74058698 Once Upon a Time in Mexico,2003,Thriller,55845943 Die Hard,1988,Action,81350242 Role Models,2008,Comedy,67266300 The Big Short,2015,Biography,70235322 Taking Woodstock,2009,Comedy,7443007 Miracle,2004,Sport,64371181 Dawn of the Dead,2004,Thriller,58885635 The Wedding Planner,2001,Romance,60400856 The Royal Tenenbaums,2001,Comedy,52353636 Identity,2003,Thriller,51475962 Last Vegas,2013,Romance,63910583 For Your Eyes Only,1981,Thriller,62300000 Serendipity,2001,Comedy,49968653 Timecop,1994,Thriller,44450000 Zoolander,2001,Comedy,45162741 Safe Haven,2013,Thriller,71346930 Hocus Pocus,1993,Family,39514713 No Reservations,2007,Romance,43097652 Kick-Ass,2010,Comedy,48043505 30 Minutes or Less,2011,Action,37053924 Dracula 2000,2000,Action,33000377 "Alexander and the Terrible, Horrible, No Good, Very Bad Day",2014,Family,66950483 Pride & Prejudice,2005,Romance,38372662 Blade Runner,1982,Thriller,27000000 Rob Roy,1995,Biography,31600000 3 Days to Kill,2014,Drama,30688364 We Own the Night,2007,Thriller,28563179 Lost Souls,2000,Drama,16779636 Just My Luck,2006,Romance,17324744 "Mystery, Alaska",1999,Comedy,8888143 The Spy Next Door,2010,Action,24268828 A Simple Wish,1997,Fantasy,8119205 Ghosts of Mars,2001,Action,8434601 Our Brand Is Crisis,2015,Comedy,6998324 Pride and Prejudice and Zombies,2016,Romance,10907291 Kundun,1997,Drama,5532301 How to Lose Friends & Alienate People,2008,Drama,2775593 Kick-Ass 2,2013,Comedy,28751715 Brick Mansions,2014,Action,20285518 Octopussy,1983,Adventure,67900000 Knocked Up,2007,Comedy,148734225 My Sister's Keeper,2009,Drama,49185998 "Welcome Home, Roscoe Jenkins",2008,Comedy,42168445 A Passage to India,1984,History,26400000 Notes on a Scandal,2006,Crime,17508670 Rendition,2007,Drama,9664316 Star Trek VI: The Undiscovered Country,1991,Action,74888996 Divine Secrets of the Ya-Ya Sisterhood,2002,Drama,69586544 The Jungle Book,2016,Drama,362645141 Kiss the Girls,1997,Drama,60491560 The Blues Brothers,1980,Crime,54200000 Joyful Noise,2012,Music,30920167 About a Boy,2002,Comedy,40566655 Lake Placid,1999,Action,31768374 Lucky Number Slevin,2006,Mystery,22494487 The Right Stuff,1983,Drama,21500000 Anonymous,2011,Drama,4463292 Dark City,1998,Drama,14337579 The Duchess,2008,Biography,13823741 The Newton Boys,1998,Western,10297897 Case 39,2009,Mystery,13248477 Suspect Zero,2004,Mystery,8712564 Martian Child,2007,Family,7486906 Spy Kids: All the Time in the World in 4D,2011,Comedy,38536376 Money Monster,2016,Thriller,41008532 Formula 51,2001,Thriller,5204007 Flawless,1999,Crime,4485485 Mindhunters,2004,Crime,4476235 What Just Happened,2008,Drama,1089365 The Statement,2003,Thriller,763044 Paul Blart: Mall Cop,2009,Action,20819129 Freaky Friday,2003,Romance,110222438 The 40-Year-Old Virgin,2005,Comedy,109243478 Shakespeare in Love,1998,Drama,100241322 A Walk Among the Tombstones,2014,Mystery,25977365 Kindergarten Cop,1990,Action,91457688 Pineapple Express,2008,Crime,87341380 Ever After: A Cinderella Story,1998,Comedy,65703412 Open Range,2003,Western,58328680 Flatliners,1990,Sci-Fi,61490000 A Bridge Too Far,1977,War,50800000 Red Eye,2005,Mystery,57859105 Final Destination 2,2003,Horror,46455802 "O Brother, Where Art Thou?",2000,Adventure,45506619 Legion,2010,Action,40168080 Pain & Gain,2013,Crime,49874933 In Good Company,2004,Romance,45489752 Clockstoppers,2002,Action,36985501 Silverado,1985,Action,33200000 Brothers,2009,Thriller,28501651 Agent Cody Banks 2: Destination London,2004,Family,23222861 New Year's Eve,2011,Comedy,54540525 Original Sin,2001,Romance,16252765 The Raven,2012,Thriller,16005978 Welcome to Mooseport,2004,Romance,14469428 Highlander: The Final Dimension,1994,Fantasy,13829734 Blood and Wine,1996,Drama,1075288 The Curse of the Jade Scorpion,2001,Comedy,7496522 Flipper,1996,Adventure,20047715 Self/less,2015,Mystery,12276810 The Constant Gardener,2005,Romance,33565375 The Passion of the Christ,2004,Drama,499263 Mrs. Doubtfire,1993,Comedy,219200000 Rain Man,1988,Drama,172825435 Gran Torino,2008,Drama,148085755 W.,2008,Biography,25517500 Taken,2008,Action,145000989 The Best of Me,2014,Romance,26761283 The Bodyguard,1992,Action,121945720 Schindler's List,1993,Biography,96067179 The Help,2011,Drama,169705587 The Fifth Estate,2013,Biography,3254172 Scooby-Doo 2: Monsters Unleashed,2004,Comedy,84185387 Freddy vs. Jason,2003,Thriller,82163317 Jimmy Neutron: Boy Genius,2001,Sci-Fi,80920948 Cloverfield,2008,Adventure,80034302 Teenage Mutant Ninja Turtles II: The Secret of the Ooze,1991,Adventure,78656813 The Untouchables,1987,Thriller,76270454 No Country for Old Men,2007,Drama,74273505 Ride Along,2014,Action,134141530 Bridget Jones's Diary,2001,Comedy,71500556 Chocolat,2000,Romance,71309760 "Legally Blonde 2: Red, White & Blonde",2003,Comedy,89808372 Parental Guidance,2012,Comedy,77264926 No Strings Attached,2011,Comedy,70625986 Tombstone,1993,Romance,56505065 Romeo Must Die,2000,Action,55973336 Final Destination 3,2006,Horror,54098051 The Lucky One,2012,Drama,60443237 Bridge to Terabithia,2007,Family,82234139 Finding Neverland,2004,Family,51676606 A Madea Christmas,2013,Comedy,52528330 The Grey,2011,Thriller,51533608 Hide and Seek,2005,Horror,51097664 Anchorman: The Legend of Ron Burgundy,2004,Comedy,84136909 Goodfellas,1990,Drama,46836394 Agent Cody Banks,2003,Adventure,47285499 Nanny McPhee,2005,Fantasy,47124400 Scarface,1983,Crime,44700000 Nothing to Lose,1997,Adventure,44455658 The Last Emperor,1987,Biography,43984230 Contraband,2012,Drama,66489425 Money Talks,1997,Comedy,41067398 There Will Be Blood,2007,Drama,40218903 The Wild Thornberrys Movie,2002,Animation,39880476 Rugrats Go Wild,2003,Musical,39399750 Undercover Brother,2002,Action,38230435 The Sisterhood of the Traveling Pants,2005,Romance,39008741 Kiss of the Dragon,2001,Crime,36833473 The House Bunny,2008,Romance,48237389 Million Dollar Arm,2014,Sport,36447959 The Giver,2014,Romance,45089048 What a Girl Wants,2003,Drama,35990505 Jeepers Creepers II,2003,Horror,35143332 Good Luck Chuck,2007,Romance,35000629 Cradle 2 the Grave,2003,Crime,34604054 The Hours,2002,Drama,41597830 She's the Man,2006,Romance,33687630 Mr. Bean's Holiday,2007,Family,32553210 Anacondas: The Hunt for the Blood Orchid,2004,Horror,31526393 Blood Ties,2013,Drama,41229 August Rush,2007,Drama,31655091 Elizabeth,1998,History,30012990 Bride of Chucky,1998,Horror,32368960 Tora! Tora! Tora!,1970,Action,14500000 Spice World,1997,Music,29247405 Dance Flick,2009,Music,25615792 The Shawshank Redemption,1994,Crime,28341469 Crocodile Dundee in Los Angeles,2001,Adventure,25590119 Kingpin,1996,Comedy,24944213 The Gambler,2014,Drama,33631221 August: Osage County,2013,Drama,37738400 A Lot Like Love,2005,Romance,21835784 Eddie the Eagle,2016,Drama,15785632 He Got Game,1998,Sport,21554585 Don Juan DeMarco,1994,Romance,22200000 The Losers,2010,Mystery,23527955 Don't Be Afraid of the Dark,2010,Horror,24042490 War,2007,Thriller,22466994 Punch-Drunk Love,2002,Comedy,17791031 EuroTrip,2004,Comedy,17718223 Half Past Dead,2002,Crime,15361537 Unaccompanied Minors,2006,Adventure,16647384 "Bright Lights, Big City",1988,Drama,16118077 The Adventures of Pinocchio,1996,Adventure,15091542 The Box,2009,Thriller,15045676 The Ruins,2008,Horror,17427926 The Next Best Thing,2000,Comedy,14983572 My Soul to Take,2010,Mystery,14637490 The Girl Next Door,2004,Comedy,14589444 Maximum Risk,1996,Romance,14095303 Stealing Harvard,2002,Crime,13973532 Legend,2015,Crime,1865774 Shark Night 3D,2011,Thriller,18860403 Angela's Ashes,1999,Drama,13038660 Draft Day,2014,Sport,28831145 The Conspirator,2010,Crime,11538204 Lords of Dogtown,2005,Sport,11008432 The 33,2015,Drama,12188642 Big Trouble in Little China,1986,Adventure,11100000 Warrior,2011,Sport,13651662 Michael Collins,1996,Biography,11030963 Gettysburg,1993,Drama,10769960 Stop-Loss,2008,War,10911750 Abandon,2002,Mystery,10719367 Brokedown Palace,1999,Mystery,10114315 The Possession,2012,Horror,49122319 Mrs. Winterbourne,1996,Romance,10070000 Straw Dogs,2011,Action,10324441 The Hoax,2006,Drama,7156933 Stone Cold,1991,Thriller,9286314 The Road,2009,Adventure,56692 Underclassman,2005,Thriller,5654777 Say It Isn't So,2001,Comedy,5516708 The World's Fastest Indian,2005,Sport,5128124 Snakes on a Plane,2006,Action,34014398 Tank Girl,1995,Action,4064333 King's Ransom,2005,Crime,4006906 Blindness,2008,Thriller,3073392 BloodRayne,2005,Action,1550000 Where the Truth Lies,2005,Mystery,871527 Without Limits,1998,Sport,777423 Me and Orson Welles,2008,Drama,1186957 The Best Offer,2013,Crime,85433 Bad Lieutenant: Port of Call New Orleans,2009,Crime,1697956 Little White Lies,2010,Comedy,183662 Love Ranch,2010,Sport,134904 The Counselor,2013,Drama,16969390 Dangerous Liaisons,1988,Drama,34700000 On the Road,2012,Adventure,717753 Star Trek IV: The Voyage Home,1986,Sci-Fi,109713132 Rocky Balboa,2006,Drama,70269171 Point Break,2015,Sport,28772222 Scream 2,1997,Horror,101334374 Jane Got a Gun,2016,Drama,1512815 Think Like a Man Too,2014,Comedy,65182182 The Whole Nine Yards,2000,Comedy,57262492 Footloose,1984,Music,80000000 Old School,2003,Comedy,74608545 The Fisher King,1991,Comedy,41895491 I Still Know What You Did Last Summer,1998,Mystery,39989008 Return to Me,2000,Romance,32662299 Zack and Miri Make a Porno,2008,Romance,31452765 Nurse Betty,2000,Comedy,25167270 The Men Who Stare at Goats,2009,War,32416109 Double Take,2001,Crime,20218 "Girl, Interrupted",1999,Biography,28871190 Win a Date with Tad Hamilton!,2004,Comedy,16964743 Muppets from Space,1999,Comedy,16290976 The Wiz,1978,Music,13000000 Ready to Rumble,2000,Sport,12372410 Play It to the Bone,1999,Drama,8427204 I Don't Know How She Does It,2011,Comedy,9639242 Piranha 3D,2010,Horror,25003072 Beyond the Sea,2004,Drama,6144806 The Princess and the Cobbler,1993,Animation,669276 The Bridge of San Luis Rey,2004,Drama,42880 Faster,2010,Crime,23225911 Howl's Moving Castle,2004,Adventure,4710455 Zombieland,2009,Sci-Fi,75590286 King Kong,2005,Drama,218051260 The Waterboy,1998,Comedy,161487252 Star Wars: Episode V - The Empire Strikes Back,1980,Fantasy,290158751 Bad Boys,1995,Crime,65807024 The Naked Gun 2½: The Smell of Fear,1991,Comedy,86930411 Final Destination,2000,Thriller,53302314 The Ides of March,2011,Drama,40962534 Pitch Black,2000,Horror,39235088 Someone Like You...,2001,Romance,27338033 Her,2013,Drama,25556065 Eddie the Eagle,2016,Sport,15785632 Joy Ride,2001,Thriller,21973182 The Adventurer: The Curse of the Midas Box,2013,Fantasy,4756 Anywhere But Here,1999,Drama,18653615 Chasing Liberty,2004,Romance,12189514 The Crew,2000,Crime,13019253 Haywire,2011,Thriller,18934858 Jaws: The Revenge,1987,Horror,20763013 Marvin's Room,1996,Drama,12782508 The Longshots,2008,Family,11508423 The End of the Affair,1999,Drama,10660147 Harley Davidson and the Marlboro Man,1991,Western,7434726 Coco Before Chanel,2009,Biography,6109075 Chéri,2009,Drama,2708188 Vanity Fair,2004,Drama,16123851 1408,2007,Horror,71975611 Spaceballs,1987,Comedy,38119483 The Water Diviner,2014,Drama,4190530 Ghost,1990,Fantasy,217631306 There's Something About Mary,1998,Romance,176483808 The Santa Clause,1994,Fantasy,144833357 The Rookie,2002,Sport,75597042 The Game Plan,2007,Sport,90636983 The Bridges of Madison County,1995,Drama,70960517 The Animal,2001,Comedy,55762229 The Hundred-Foot Journey,2014,Comedy,54235441 The Net,1995,Mystery,50728000 I Am Sam,2001,Drama,40270895 Son of God,2014,History,59696176 Underworld,2003,Fantasy,51483949 Derailed,2005,Drama,36020063 The Informant!,2009,Drama,33313582 Shadowlands,1993,Drama,25842000 Deuce Bigalow: European Gigolo,2005,Comedy,22264487 Delivery Man,2013,Drama,30659817 Victor Frankenstein,2015,Drama,5773519 Saving Silverman,2001,Comedy,19351569 Diary of a Wimpy Kid: Dog Days,2012,Comedy,49002815 Summer of Sam,1999,Thriller,19283782 Jay and Silent Bob Strike Back,2001,Comedy,30059386 The Island,2005,Sci-Fi,35799026 The Glass House,2001,Thriller,17951431 "Hail, Caesar!",2016,Comedy,29997095 Josie and the Pussycats,2001,Comedy,14252830 Homefront,2013,Action,19783777 The Little Vampire,2000,Adventure,13555988 I Heart Huckabees,2004,Comedy,12784713 RoboCop 3,1993,Crime,10696210 Megiddo: The Omega Code 2,2001,Action,5974653 Darling Lili,1970,Drama,5000000 Dudley Do-Right,1999,Romance,9694105 The Transporter Refueled,2015,Thriller,16027866 Black Book,2006,War,4398392 Joyeux Noel,2005,Music,1050445 Hit and Run,2012,Action,13746550 Mad Money,2008,Thriller,20668843 Before I Go to Sleep,2014,Mystery,2963012 Stone,2010,Thriller,1796024 Molière,2007,Comedy,634277 Out of the Furnace,2013,Crime,11326836 Michael Clayton,2007,Thriller,49024969 My Fellow Americans,1996,Comedy,22294341 Arlington Road,1999,Crime,24362501 To Rome with Love,2012,Comedy,16684352 Firefox,1982,Action,46700000 South Park: Bigger Longer & Uncut,1999,Fantasy,52008288 Death at a Funeral,2007,Comedy,8579684 Teenage Mutant Ninja Turtles III,1993,Fantasy,42660000 Hardball,2001,Sport,40219708 Silver Linings Playbook,2012,Romance,132088910 Freedom Writers,2007,Crime,36581633 The Transporter,2002,Action,25296447 Never Back Down,2008,Sport,24848292 The Rage: Carrie 2,1999,Thriller,17757087 Away We Go,2009,Drama,9430988 Swing Vote,2008,Drama,16284360 Moonlight Mile,2002,Romance,6830957 Tinker Tailor Soldier Spy,2011,Drama,24104113 Molly,1999,Drama,15593 The Beaver,2011,Drama,958319 The Best Little Whorehouse in Texas,1982,Comedy,69700000 eXistenZ,1999,Horror,2840417 Raiders of the Lost Ark,1981,Action,242374454 Home Alone 2: Lost in New York,1992,Comedy,173585516 Close Encounters of the Third Kind,1977,Sci-Fi,128300000 Pulse,2006,Thriller,20259297 Beverly Hills Cop II,1987,Comedy,153665036 Bringing Down the House,2003,Comedy,132541238 The Silence of the Lambs,1991,Crime,130727000 Wayne's World,1992,Comedy,121697350 Jackass 3D,2010,Comedy,117224271 Jaws 2,1978,Thriller,102922376 Beverly Hills Chihuahua,2008,Comedy,94497271 The Conjuring,2013,Thriller,137387272 Are We There Yet?,2005,Family,82301521 Tammy,2014,Comedy,84518155 Disturbia,2007,Drama,80050171 School of Rock,2003,Music,81257845 Mortal Kombat,1995,Thriller,70360285 Wicker Park,2004,Drama,12831121 White Chicks,2004,Crime,69148997 The Descendants,2011,Drama,82624961 Holes,2003,Family,67325559 The Last Song,2010,Romance,62933793 12 Years a Slave,2013,Biography,56667870 Drumline,2002,Music,56398162 Why Did I Get Married Too?,2010,Romance,60072596 Edward Scissorhands,1990,Romance,56362352 Me Before You,2016,Romance,56154094 Madea's Witness Protection,2012,Crime,65623128 Date Movie,2006,Romance,48546578 Return to Never Land,2002,Adventure,48423368 Selma,2014,Drama,52066000 The Jungle Book 2,2003,Animation,47887943 Boogeyman,2005,Thriller,46363118 Premonition,2007,Drama,47852604 The Tigger Movie,2000,Drama,45542421 Max,2015,Family,42652003 Epic Movie,2007,Comedy,39737645 Conan the Barbarian,1982,Adventure,37567440 Spotlight,2015,History,44988180 Lakeview Terrace,2008,Crime,39263506 The Grudge 2,2006,Horror,39143839 How Stella Got Her Groove Back,1998,Drama,37672350 Bill & Ted's Bogus Journey,1991,Music,38037513 Man of the Year,2006,Comedy,37442180 The American,2010,Crime,35596227 Selena,1997,Music,35422828 Vampires Suck,2010,Comedy,36658108 Babel,2006,Drama,34300771 This Is Where I Leave You,2014,Comedy,34290142 Doubt,2008,Drama,33422556 Team America: World Police,2004,Comedy,32774834 Texas Chainsaw 3D,2013,Thriller,34334256 Copycat,1995,Drama,32051917 Scary Movie 5,2013,Comedy,32014289 Milk,2008,Drama,31838002 Risen,2016,Mystery,36874745 Ghost Ship,2002,Horror,30079316 A Very Harold & Kumar 3D Christmas,2011,Comedy,35033759 Wild Things,1998,Mystery,29753944 The Debt,2010,Drama,31146570 High Fidelity,2000,Drama,27277055 One Missed Call,2008,Mystery,26876529 Eye for an Eye,1996,Crime,53146000 The Bank Job,2008,Romance,30028592 Eternal Sunshine of the Spotless Mind,2004,Drama,34126138 You Again,2010,Family,25677801 Street Kings,2008,Drama,26415649 The World's End,2013,Comedy,26003149 Nancy Drew,2007,Comedy,25584685 Daybreakers,2009,Thriller,29975979 She's Out of My League,2010,Comedy,31584722 Monte Carlo,2011,Family,23179303 Stay Alive,2006,Thriller,23078294 Quigley Down Under,1990,Drama,21413105 Alpha and Omega,2010,Comedy,25077977 The Covenant,2006,Fantasy,23292105 Shorts,2009,Family,20916309 To Die For,1995,Drama,21200000 Vampires,1998,Action,20241395 Psycho,1960,Mystery,32000000 My Best Friend's Girl,2008,Romance,19151864 Endless Love,2014,Romance,23393765 Georgia Rule,2007,Comedy,18882880 Under the Rainbow,1981,Comedy,8500000 Simon Birch,1998,Drama,18252684 Reign Over Me,2007,Drama,19661987 Into the Wild,2007,Biography,18352454 School for Scoundrels,2006,Comedy,17803796 Silent Hill: Revelation 3D,2012,Horror,17529157 From Dusk Till Dawn,1996,Crime,25753840 Pooh's Heffalump Movie,2005,Animation,18081626 Home for the Holidays,1995,Comedy,17518220 Kung Fu Hustle,2004,Action,17104669 The Country Bears,2002,Family,16988996 The Kite Runner,2007,Drama,15797907 21 Grams,2003,Drama,16248701 Paparazzi,2004,Crime,15712072 Twilight,2008,Romance,191449475 A Guy Thing,2003,Romance,15408822 Loser,2000,Comedy,15464026 The Greatest Story Ever Told,1965,History,8000000 Disaster Movie,2008,Comedy,14174654 Armored,2009,Thriller,15988876 The Man Who Knew Too Little,1997,Thriller,13801755 What's Your Number?,2011,Romance,13987482 Lockout,2012,Thriller,14291570 Envy,2004,Comedy,12181484 Crank: High Voltage,2009,Crime,13630226 Bullets Over Broadway,1994,Crime,13383737 One Night with the King,2006,Drama,13391174 The Quiet American,2002,War,12987647 The Weather Man,2005,Drama,12469811 Undisputed,2002,Action,12398628 Ghost Town,2008,Fantasy,13214030 12 Rounds,2009,Action,12232937 Let Me In,2010,Horror,12134420 3 Ninjas Kick Back,1994,Action,11784000 Be Kind Rewind,2008,Comedy,11169531 Mrs Henderson Presents,2005,War,11034436 Triple 9,2016,Crime,12626905 Deconstructing Harry,1997,Comedy,10569071 Three to Tango,1999,Romance,10544143 Burnt,2015,Comedy,13650738 We're No Angels,1989,Comedy,10555348 Everyone Says I Love You,1996,Musical,9714482 Death at a Funeral,2007,Comedy,8579684 Death Sentence,2007,Crime,9525276 Everybody's Fine,2009,Adventure,8855646 Superbabies: Baby Geniuses 2,2004,Family,9109322 The Man,2005,Action,8326035 Code Name: The Cleaner,2007,Crime,8104069 Connie and Carla,2004,Comedy,8054280 Inherent Vice,2014,Romance,8093318 Doogal,2006,Adventure,7382993 Battle of the Year,2013,Music,8888355 An American Carol,2008,Comedy,7001720 Machete Kills,2013,Action,7268659 Willard,2003,Horror,6852144 Strange Wilderness,2008,Adventure,6563357 Topsy-Turvy,1999,Drama,6201757 A Dangerous Method,2011,Thriller,5702083 A Scanner Darkly,2006,Mystery,5480996 Chasing Mavericks,2012,Sport,6002756 Alone in the Dark,2005,Sci-Fi,5132655 Bandslam,2009,Family,5205343 Birth,2004,Thriller,5005883 A Most Violent Year,2014,Crime,5749134 Flash of Genius,2008,Drama,4234040 I'm Not There.,2007,Drama,4001121 The Cold Light of Day,2012,Thriller,3749061 The Brothers Bloom,2008,Drama,3519627 "Synecdoche, New York",2008,Drama,3081925 Princess Mononoke,1997,Adventure,2298191 Bon voyage,2003,Mystery,2353728 Can't Stop the Music,1980,Musical,2000000 The Proposition,2005,Western,1900725 Courage,2015,Biography,2246000 Marci X,2003,Comedy,1646664 Equilibrium,2002,Thriller,1190018 The Children of Huang Shi,2008,War,1027749 The Yards,2000,Crime,882710 By the Sea,2015,Drama,531009 Steamboy,2004,Family,410388 The Game of Their Lives,2005,Drama,375474 Rapa Nui,1994,History,305070 Dylan Dog: Dead of Night,2010,Crime,1183354 People I Know,2002,Drama,121972 The Tempest,2010,Fantasy,263365 The Painted Veil,2006,Romance,8047690 The Baader Meinhof Complex,2008,Drama,476270 Dances with Wolves,1990,Adventure,184208848 Bad Teacher,2011,Comedy,100292856 Sea of Love,1989,Crime,58571513 A Cinderella Story,2004,Family,51431160 Scream,1996,Mystery,103001286 Thir13en Ghosts,2001,Horror,41867960 Back to the Future,1985,Sci-Fi,210609762 House on Haunted Hill,1999,Horror,40846082 I Can Do Bad All by Myself,2009,Comedy,51697449 The Switch,2010,Romance,27758465 Just Married,2003,Romance,56127162 The Devil's Double,2011,Biography,1357042 Thomas and the Magic Railroad,2000,Comedy,15911333 The Crazies,2010,Thriller,39103378 Spirited Away,2001,Family,10049886 The Bounty,1984,Adventure,8600000 The Book Thief,2013,Drama,21483154 Sex Drive,2008,Adventure,8396942 Leap Year,2010,Comedy,12561 Take Me Home Tonight,2011,Romance,6923891 The Nutcracker,1993,Fantasy,2119994 Kansas City,1996,Drama,1292527 The Amityville Horror,2005,Thriller,64255243 Adaptation.,2002,Drama,22245861 Land of the Dead,2005,Horror,20433940 Fear and Loathing in Las Vegas,1998,Comedy,10562387 The Invention of Lying,2009,Comedy,18439082 Neighbors,2014,Comedy,150056505 The Mask,1994,Action,119938730 Big,1988,Fantasy,114968774 Borat: Cultural Learnings of America for Make Benefit Glorious Nation of Kazakhstan,2006,Comedy,128505958 Legally Blonde,2001,Romance,95001351 Star Trek III: The Search for Spock,1984,Action,76400000 The Exorcism of Emily Rose,2005,Drama,75072454 Deuce Bigalow: Male Gigolo,1999,Romance,65535067 Left Behind,2014,Thriller,13998282 The Family Stone,2005,Comedy,6061759 Barbershop 2: Back in Business,2004,Drama,64955956 Bad Santa,2003,Drama,60057639 Austin Powers: International Man of Mystery,1997,Comedy,53868030 My Big Fat Greek Wedding 2,2016,Family,59573085 Diary of a Wimpy Kid: Rodrick Rules,2011,Comedy,52691009 Predator,1987,Sci-Fi,59735548 Amadeus,1984,History,51600000 Prom Night,2008,Horror,43818159 Mean Girls,2004,Comedy,86049418 Under the Tuscan Sun,2003,Romance,43601508 Gosford Park,2001,Mystery,41300105 Peggy Sue Got Married,1986,Comedy,41382841 Birdman or (The Unexpected Virtue of Ignorance),2014,Comedy,42335698 Blue Jasmine,2013,Drama,33404871 United 93,2006,History,31471430 Honey,2003,Drama,30222640 Glory,1989,History,26830000 Spy Hard,1996,Action,26906039 The Fog,1980,Fantasy,21378000 Soul Surfer,2011,Sport,43853424 Observe and Report,2009,Crime,23993605 Conan the Destroyer,1984,Fantasy,26400000 Raging Bull,1980,Drama,45250 Love Happens,2009,Drama,22927390 Young Sherlock Holmes,1985,Thriller,4250320 Fame,2009,Musical,22452209 127 Hours,2010,Thriller,18329466 Small Time Crooks,2000,Comedy,17071230 Center Stage,2000,Drama,17174870 Love the Coopers,2015,Comedy,26284475 Catch That Kid,2004,Comedy,16702864 Life as a House,2001,Drama,15561627 Steve Jobs,2015,Biography,17750583 "I Love You, Beth Cooper",2009,Comedy,14793904 Youth in Revolt,2009,Romance,15281286 The Legend of the Lone Ranger,1981,Western,8000000 The Tailor of Panama,2001,Thriller,13491653 Getaway,2013,Crime,10494494 The Ice Storm,1997,Drama,7837632 And So It Goes,2014,Drama,15155772 Troop Beverly Hills,1989,Comedy,8508843 Being Julia,2004,Drama,7739049 9½ Weeks,1986,Romance,6734844 Dragonslayer,1981,Adventure,6000000 The Last Station,2009,Drama,6615578 Ed Wood,1994,Biography,5887457 Labor Day,2013,Drama,13362308 Mongol: The Rise of Genghis Khan,2007,Biography,5701643 RocknRolla,2008,Crime,5694401 Megaforce,1982,Action,5333658 Hamlet,1996,Drama,4414535 Midnight Special,2016,Thriller,3707794 Anything Else,2003,Romance,3203044 The Railway Man,2013,Biography,4435083 The White Ribbon,2009,Drama,2222647 The Wraith,1986,Romance,3500000 The Salton Sea,2002,Drama,676698 One Man's Hero,1999,Western,229311 Renaissance,2006,Thriller,63260 Superbad,2007,Comedy,121463226 Step Up 2: The Streets,2008,Romance,58006147 Hoodwinked!,2005,Comedy,51053787 Hotel Rwanda,2004,Drama,23472900 Hitman,2007,Action,39687528 Black Nativity,2013,Family,7017178 City of Ghosts,2002,Crime,325491 The Others,2001,Horror,96471845 Aliens,1986,Action,85200000 My Fair Lady,1964,Romance,72000000 I Know What You Did Last Summer,1997,Mystery,72219395 Let's Be Cops,2014,Comedy,82389560 Sideways,2004,Adventure,71502303 Beerfest,2006,Comedy,19179969 Halloween,1978,Thriller,47000000 Hero,2002,Action,84961 Good Boy!,2003,Drama,37566230 The Best Man Holiday,2013,Comedy,70492685 Smokin' Aces,2006,Action,35635046 Saw 3D: The Final Chapter,2010,Mystery,45670855 40 Days and 40 Nights,2002,Romance,37939782 TRON: Legacy,2010,Action,172051787 A Night at the Roxbury,1998,Romance,30324946 Beastly,2011,Fantasy,27854896 The Hills Have Eyes,2006,Horror,41777564 Dickie Roberts: Former Child Star,2003,Comedy,22734486 "McFarland, USA",2015,Biography,44469602 Pitch Perfect,2012,Comedy,64998368 Summer Catch,2001,Comedy,19693891 A Simple Plan,1998,Drama,16311763 They,2002,Horror,12693621 Larry the Cable Guy: Health Inspector,2006,Comedy,15655665 The Adventures of Elmo in Grouchland,1999,Comedy,11634458 Brooklyn's Finest,2009,Drama,27154426 Evil Dead,2013,Horror,54239856 My Life in Ruins,2009,Romance,8662318 American Dreamz,2006,Music,7156725 Superman IV: The Quest for Peace,1987,Sci-Fi,15681020 Running Scared,2006,Drama,6855137 Shanghai Surprise,1986,Romance,2315683 The Illusionist,2006,Mystery,39825798 Roar,1981,Thriller,2000000 Veronica Guerin,2003,Crime,1569918 Southland Tales,2006,Thriller,273420 The Apparition,2012,Horror,4930798 My Girl,1991,Romance,59847242 Fur: An Imaginary Portrait of Diane Arbus,2006,Drama,220914 The Illusionist,2006,Drama,39825798 Wall Street,1987,Crime,43848100 Sense and Sensibility,1995,Drama,42700000 Becoming Jane,2007,Drama,18663911 Sydney White,2007,Comedy,11702090 House of Sand and Fog,2003,Drama,13005485 Dead Poets Society,1989,Drama,95860116 Dumb & Dumber,1994,Comedy,127175354 When Harry Met Sally...,1989,Romance,92823600 The Verdict,1982,Drama,54000000 Road Trip,2000,Comedy,68525609 Varsity Blues,1999,Sport,52885587 The Artist,2011,Comedy,44667095 The Unborn,2009,Fantasy,42638165 Moonrise Kingdom,2012,Comedy,45507053 The Texas Chainsaw Massacre: The Beginning,2006,Horror,39511038 The Young Messiah,2016,Drama,6462576 The Master of Disguise,2002,Family,40363530 Pan's Labyrinth,2006,War,37623143 See Spot Run,2001,Action,33357476 Baby Boy,2001,Crime,28734552 The Roommate,2011,Horror,37300107 Joe Dirt,2001,Comedy,27087695 Double Impact,1991,Crime,30102717 Hot Fuzz,2007,Action,23618786 The Women,2008,Drama,26896744 Vicky Cristina Barcelona,2008,Drama,23213577 Boys and Girls,2000,Drama,20627372 White Oleander,2002,Drama,16346122 Jennifer's Body,2009,Comedy,16204793 Drowning Mona,2000,Mystery,15427192 Radio Days,1987,Comedy,14792779 Left Behind,2014,Fantasy,13998282 Remember Me,2010,Romance,19057024 How to Deal,2003,Drama,14108518 My Stepmother Is an Alien,1988,Sci-Fi,13854000 Philadelphia,1993,Drama,77324422 The Thirteenth Floor,1999,Thriller,15500000 Duets,2000,Music,4734235 Hollywood Ending,2002,Romance,4839383 Detroit Rock City,1999,Comedy,4193025 Highlander,1986,Action,5900000 Things We Lost in the Fire,2007,Drama,2849142 Steel,1997,Crime,1686429 The Immigrant,2013,Drama,1984743 The White Countess,2005,History,1666262 Trance,2013,Thriller,2319187 Soul Plane,2004,Comedy,13922211 Good,2008,Romance,23091 Enter the Void,2009,Fantasy,336467 Vamps,2012,Romance,2964 The Homesman,2014,Drama,2428883 Juwanna Mann,2002,Drama,13571817 Slow Burn,2005,Thriller,1181197 Wasabi,2001,Drama,81525 Slither,2006,Comedy,7774730 Beverly Hills Cop,1984,Action,234760500 Home Alone,1990,Family,285761243 3 Men and a Baby,1987,Comedy,167780960 Tootsie,1982,Comedy,177200000 Top Gun,1986,Romance,176781728 "Crouching Tiger, Hidden Dragon",2000,Action,128067808 American Beauty,1999,Drama,130058047 The King's Speech,2010,History,138795342 Twins,1988,Crime,111936400 The Yellow Handkerchief,2008,Romance,317040 The Color Purple,1985,Drama,94175854 The Imitation Game,2014,War,91121452 Private Benjamin,1980,War,69800000 Diary of a Wimpy Kid,2010,Family,64001297 Mama,2013,Horror,71588220 Halloween,1978,Thriller,47000000 National Lampoon's Vacation,1983,Comedy,61400000 Bad Grandpa,2013,Comedy,101978840 The Queen,2006,Biography,56437947 Beetlejuice,1988,Fantasy,73326666 Why Did I Get Married?,2007,Comedy,55184721 Little Women,1994,Family,50003300 The Woman in Black,2012,Horror,54322273 When a Stranger Calls,2006,Thriller,47860214 Big Fat Liar,2002,Adventure,47811275 Wag the Dog,1997,Drama,43022524 The Lizzie McGuire Movie,2003,Romance,42672630 Snitch,2013,Action,42919096 Krampus,2015,Fantasy,42592530 The Faculty,1998,Sci-Fi,40064955 Cop Land,1997,Thriller,44886089 Not Another Teen Movie,2001,Comedy,37882551 End of Watch,2012,Drama,40983001 Aloha,2015,Romance,20991497 The Skulls,2000,Action,35007180 The Theory of Everything,2014,Romance,35887263 Malibu's Most Wanted,2003,Crime,34308901 Where the Heart Is,2000,Drama,33771174 Lawrence of Arabia,1962,History,6000000 Halloween II,2009,Horror,33386128 Wild,2014,Biography,37877959 The Last House on the Left,2009,Crime,32721635 The Wedding Date,2005,Romance,31585300 Halloween: Resurrection,2002,Comedy,30259652 Clash of the Titans,2010,Adventure,163192114 The Princess Bride,1987,Adventure,30857814 The Great Debaters,2007,Drama,30226144 Drive,2011,Crime,35054909 Confessions of a Teenage Drama Queen,2004,Comedy,29302097 The Object of My Affection,1998,Drama,29106737 28 Weeks Later,2007,Horror,28637507 When the Game Stands Tall,2014,Family,30127963 Because of Winn-Dixie,2005,Comedy,32645546 Love & Basketball,2000,Drama,27441122 Grosse Pointe Blank,1997,Crime,28014536 All About Steve,2009,Comedy,33860010 Book of Shadows: Blair Witch 2,2000,Mystery,26421314 The Craft,1996,Horror,24881000 Match Point,2005,Thriller,23089926 Ramona and Beezus,2010,Family,26161406 The Remains of the Day,1993,Drama,22954968 Boogie Nights,1997,Drama,26384919 Nowhere to Run,1993,Drama,22189039 Flicka,2006,Family,20998709 The Hills Have Eyes II,2007,Horror,20801344 Urban Legends: Final Cut,2000,Thriller,21468807 Tuck Everlasting,2002,Fantasy,19158074 The Marine,2006,Thriller,18843314 Keanu,2016,Comedy,20566327 Country Strong,2010,Music,20218921 Disturbing Behavior,1998,Sci-Fi,17411331 The Place Beyond the Pines,2012,Crime,21383298 The November Man,2014,Thriller,24984868 Eye of the Beholder,1999,Mystery,16459004 The Hurt Locker,2008,Drama,15700000 Firestarter,1984,Sci-Fi,15100000 Killing Them Softly,2012,Crime,14938570 A Most Wanted Man,2014,Thriller,17237244 Freddy Got Fingered,2001,Comedy,14249005 The Pirates Who Don't Do Anything: A VeggieTales Movie,2008,Animation,12701880 Highlander: Endgame,2000,Sci-Fi,12801190 Idlewild,2006,Romance,12549485 One Day,2011,Drama,13766014 Whip It,2009,Sport,13034417 Confidence,2003,Crime,12212417 The Muse,1999,Comedy,11614236 De-Lovely,2004,Drama,13337299 New York Stories,1989,Drama,10763469 Barney's Great Adventure,1998,Family,11144518 The Man with the Iron Fists,2012,Action,15608545 Home Fries,1998,Drama,10443316 Here on Earth,2000,Romance,10494147 Brazil,1985,Drama,9929000 Raise Your Voice,2004,Music,10411980 The Big Lebowski,1998,Comedy,17439163 Black Snake Moan,2006,Music,9396487 Dark Blue,2002,Crime,9059588 A Mighty Heart,2007,Thriller,9172810 Whatever It Takes,2000,Drama,8735529 Boat Trip,2002,Comedy,8586376 The Importance of Being Earnest,2002,Comedy,8378141 Hoot,2006,Family,8080116 In Bruges,2008,Crime,7757130 Peeples,2013,Romance,9123834 The Rocker,2008,Music,6409206 Post Grad,2009,Comedy,6373693 Promised Land,2012,Drama,7556708 Whatever Works,2009,Comedy,5306447 The In Crowd,2000,Thriller,5217498 Three Burials,2005,Crime,5023275 Jakob the Liar,1999,Drama,4956401 Kiss Kiss Bang Bang,2005,Comedy,4235837 Idle Hands,1999,Comedy,4002955 Mulholland Drive,2001,Drama,7219578 You Will Meet a Tall Dark Stranger,2010,Comedy,3247816 Never Let Me Go,2010,Sci-Fi,2412045 Transsiberian,2008,Drama,2203641 The Clan of the Cave Bear,1986,Drama,1953732 Crazy in Alabama,1999,Comedy,1954202 Funny Games,2007,Crime,1294640 Metropolis,1927,Drama,26435 District B13,2004,Crime,1197786 Things to Do in Denver When You're Dead,1995,Drama,529766 The Assassin,2015,Drama,613556 Buffalo Soldiers,2001,Crime,353743 Ong-bak 2,2008,Action,102055 The Midnight Meat Train,2008,Fantasy,73548 The Son of No One,2011,Drama,28870 All the Queen's Men,2001,Action,22723 The Good Night,2007,Drama,20380 Groundhog Day,1993,Fantasy,70906973 Magic Mike XXL,2015,Music,66009973 Romeo + Juliet,1996,Drama,46338728 Sarah's Key,2010,Drama,7691700 Unforgiven,1992,Western,101157447 Manderlay,2005,Drama,74205 Slumdog Millionaire,2008,Drama,141319195 Fatal Attraction,1987,Romance,156645693 Pretty Woman,1990,Romance,178406268 Crocodile Dundee II,1988,Action,109306210 Born on the Fourth of July,1989,Biography,70001698 Cool Runnings,1993,Adventure,68856263 My Bloody Valentine,2009,Horror,51527787 The Possession,2012,Thriller,49122319 Stomp the Yard,2007,Drama,61356221 The Spy Who Loved Me,1977,Sci-Fi,46800000 Urban Legend,1998,Thriller,38048637 Dangerous Liaisons,1988,Romance,34700000 White Fang,1991,Drama,34793160 Superstar,1999,Romance,30628981 The Iron Lady,2011,Drama,29959436 Jonah: A VeggieTales Movie,2002,Animation,25571351 Poetic Justice,1993,Drama,27515786 All About the Benjamins,2002,Crime,25482931 Vampire in Brooklyn,1995,Horror,19900000 An American Haunting,2005,Horror,16298046 My Boss's Daughter,2003,Comedy,15549702 A Perfect Getaway,2009,Adventure,15483540 Our Family Wedding,2010,Comedy,20246959 Dead Man on Campus,1998,Comedy,15062898 Tea with Mussolini,1999,Comedy,14348123 Thinner,1996,Fantasy,15171475 Crooklyn,1994,Drama,13640000 Jason X,2001,Thriller,12610731 Big Fat Liar,2002,Comedy,47811275 Bobby,2006,History,11204499 Head Over Heels,2001,Romance,10397365 Fun Size,2012,Adventure,9402410 Little Children,2006,Drama,5459824 Gossip,2000,Thriller,5108820 A Walk on the Moon,1999,Drama,4741987 Catch a Fire,2006,Biography,4291965 Soul Survivors,2001,Drama,3100650 Jefferson in Paris,1995,History,2474000 Caravans,1978,Adventure,1000000 Mr. Turner,2014,Drama,3958500 Amen.,2002,Biography,274299 The Lucky Ones,2008,Drama,183088 Margaret,2011,Drama,46495 Flipped,2010,Drama,1752214 Brokeback Mountain,2005,Romance,83025853 Teenage Mutant Ninja Turtles,2014,Action,190871240 Clueless,1995,Romance,56631572 Far from Heaven,2002,Drama,15854988 Hot Tub Time Machine 2,2015,Comedy,12282677 Quills,2000,Drama,7060876 Seven Psychopaths,2012,Comedy,14989761 Downfall,2004,Drama,5501940 The Sea Inside,2004,Drama,2086345 "Good Morning, Vietnam",1987,Biography,123922370 The Last Godfather,2010,Comedy,163591 Justin Bieber: Never Say Never,2011,Music,73000942 Black Swan,2010,Drama,106952327 RoboCop,2014,Action,58607007 The Godfather: Part II,1974,Drama,57300000 Save the Last Dance,2001,Drama,91038276 A Nightmare on Elm Street 4: The Dream Master,1988,Horror,49369900 Miracles from Heaven,2016,Drama,61693523 "Dude, Where's My Car?",2000,Comedy,46729374 Young Guns,1988,Western,44726644 St. Vincent,2014,Comedy,44134898 About Last Night,2014,Comedy,48637684 10 Things I Hate About You,1999,Drama,38176108 The New Guy,2002,Comedy,28972187 Loaded Weapon 1,1993,Crime,27979400 The Shallows,2016,Thriller,54257433 The Butterfly Effect,2004,Thriller,23947 Snow Day,2000,Comedy,60008303 This Christmas,2007,Romance,49121934 Baby Geniuses,1999,Crime,27141959 The Big Hit,1998,Comedy,27052167 Harriet the Spy,1996,Drama,26539321 Child's Play 2,1990,Horror,28501605 No Good Deed,2014,Crime,52543632 The Mist,2007,Horror,25592632 Ex Machina,2015,Drama,25440971 Being John Malkovich,1999,Comedy,22858926 Two Can Play That Game,2001,Comedy,22235901 Earth to Echo,2014,Family,38916903 Crazy/Beautiful,2001,Romance,16929123 Letters from Iwo Jima,2006,History,13753931 The Astronaut Farmer,2006,Drama,10996440 Room,2015,Drama,14677654 Dirty Work,1998,Comedy,9975684 Serial Mom,1994,Thriller,7881335 Dick,1999,Comedy,6241697 Light It Up,1999,Thriller,5871603 54,1998,Music,16574731 Bubble Boy,2001,Comedy,5002310 Birthday Girl,2001,Crime,4919896 21 & Over,2013,Comedy,25675765 "Paris, je t'aime",2006,Romance,4857376 Resurrecting the Champ,2007,Drama,3169424 Admission,2013,Romance,18004225 The Widow of Saint-Pierre,2000,Drama,3058380 Chloe,2009,Mystery,3074838 Faithful,1996,Drama,2104000 Brothers,2009,Drama,28501651 Find Me Guilty,2006,Crime,1172769 The Perks of Being a Wallflower,2012,Drama,17738570 Excessive Force,1993,Action,1200000 Infamous,2006,Crime,1150403 The Claim,2000,Drama,403932 The Vatican Tapes,2015,Thriller,1712111 Attack the Block,2011,Thriller,1024175 In the Land of Blood and Honey,2011,Drama,301305 The Call,2013,Thriller,51872378 The Crocodile Hunter: Collision Course,2002,Comedy,28399192 I Love You Phillip Morris,2009,Romance,2035566 Antwone Fisher,2002,Biography,21078145 The Emperor's Club,2002,Drama,14060950 True Romance,1993,Thriller,12281500 Glengarry Glen Ross,1992,Crime,10725228 The Killer Inside Me,2010,Drama,214966 Sorority Row,2009,Horror,11956207 Lars and the Real Girl,2007,Romance,5949693 The Boy in the Striped Pajamas,2008,Drama,9030581 Dancer in the Dark,2000,Musical,4157491 Oscar and Lucinda,1997,Romance,1508689 The Funeral,1996,Crime,1227324 Solitary Man,2009,Romance,4360548 Machete,2010,Thriller,26589953 Casino Jack,2010,Comedy,1039869 The Land Before Time,1988,Adventure,48092846 Tae Guk Gi: The Brotherhood of War,2004,Action,1110186 The Perfect Game,2009,Drama,1089445 The Exorcist,1973,Horror,204565000 Jaws,1975,Adventure,260000000 American Pie,1999,Comedy,101736215 Ernest & Celestine,2012,Crime,71442 The Golden Child,1986,Action,79817937 Think Like a Man,2012,Comedy,91547205 Barbershop,2002,Drama,75074950 Star Trek II: The Wrath of Khan,1982,Action,78900000 Ace Ventura: Pet Detective,1994,Comedy,72217000 WarGames,1983,Sci-Fi,79568000 Witness,1985,Romance,65500000 Act of Valor,2012,War,70011073 Step Up,2006,Crime,65269010 Beavis and Butt-Head Do America,1996,Crime,63071133 Jackie Brown,1997,Thriller,39647595 Harold & Kumar Escape from Guantanamo Bay,2008,Comedy,38087366 Chronicle,2012,Sci-Fi,64572496 Yentl,1983,Drama,30400000 Time Bandits,1981,Sci-Fi,42365600 Crossroads,2002,Drama,37188667 Project X,2012,Comedy,54724272 One Hour Photo,2002,Drama,31597131 Quarantine,2008,Sci-Fi,31691811 The Eye,2008,Mystery,31397498 Johnson Family Vacation,2004,Comedy,31179516 How High,2001,Fantasy,31155435 The Muppet Christmas Carol,1992,Fantasy,27281507 Casino Royale,2006,Thriller,167007184 Frida,2002,Romance,25776062 Katy Perry: Part of Me,2012,Music,25240988 The Fault in Our Stars,2014,Romance,124868837 Rounders,1998,Crime,22905674 Top Five,2014,Romance,25277561 Stir of Echoes,1999,Mystery,21133087 Philomena,2013,Drama,37707719 The Upside of Anger,2005,Comedy,18761993 Aquamarine,2006,Romance,18595716 Paper Towns,2015,Drama,31990064 Nebraska,2013,Drama,17613460 Tales from the Crypt: Demon Knight,1995,Thriller,21088568 Max Keeble's Big Move,2001,Comedy,17292381 Young Adult,2011,Comedy,16300302 Crank,2006,Thriller,27829874 Living Out Loud,1998,Drama,12902790 Das Boot,1981,Adventure,11433134 The Alamo,2004,War,22406362 Sorority Boys,2002,Comedy,10198766 About Time,2013,Romance,15294553 House of Flying Daggers,2004,Adventure,11041228 Arbitrage,2012,Drama,7918283 Project Almanac,2015,Sci-Fi,22331028 Cadillac Records,2008,Music,8134217 Screwed,2000,Comedy,6982680 Fortress,1992,Crime,6739141 For Your Consideration,2006,Comedy,5542025 Celebrity,1998,Drama,5032496 Running with Scissors,2006,Comedy,6754898 From Justin to Kelly,2003,Musical,4922166 Girl 6,1996,Comedy,4903000 In the Cut,2003,Mystery,4717455 Two Lovers,2008,Drama,3148482 Last Orders,2001,Drama,2326407 The Host,2006,Horror,2201412 Ravenous,1999,Fantasy,2060953 Charlie Bartlett,2007,Drama,3950294 The Great Beauty,2013,Drama,2835886 The Dangerous Lives of Altar Boys,2002,Drama,1779284 Stoker,2013,Drama,1702277 2046,2004,Sci-Fi,261481 Married Life,2007,Romance,1506998 Duma,2005,Family,860002 Ondine,2009,Drama,548934 Brother,2000,Drama,447750 Welcome to Collinwood,2002,Comedy,333976 Critical Care,1997,Comedy,141853 The Life Before Her Eyes,2007,Drama,303439 Trade,2007,Thriller,214202 Fateless,2005,Romance,195888 Breakfast of Champions,1999,Comedy,175370 City of Life and Death,2009,War,119922 Home,2015,Adventure,177343675 5 Days of War,2011,Action,17149 Snatch,2000,Comedy,30093107 Pet Sematary,1989,Fantasy,57469179 Gremlins,1984,Horror,148170000 Star Wars: Episode IV - A New Hope,1977,Sci-Fi,460935665 Dirty Grandpa,2016,Comedy,35537564 Doctor Zhivago,1965,Drama,111722000 High School Musical 3: Senior Year,2008,Comedy,90556401 The Fighter,2010,Drama,93571803 My Cousin Vinny,1992,Comedy,52929168 If I Stay,2014,Drama,50461335 Major League,1989,Sport,49797148 Phone Booth,2002,Crime,46563158 A Walk to Remember,2002,Drama,41227069 Dead Man Walking,1995,Crime,39025000 Cruel Intentions,1999,Romance,38201895 Saw VI,2009,Mystery,27669413 The Secret Life of Bees,2008,Drama,37766350 Corky Romano,2001,Comedy,23978402 Raising Cain,1992,Drama,21370057 Invaders from Mars,1986,Horror,4884663 Brooklyn,2015,Romance,38317535 Out Cold,2001,Comedy,13903262 The Ladies Man,2000,Comedy,13592872 Quartet,2012,Drama,18381787 Tomcats,2001,Comedy,13558739 Frailty,2001,Thriller,13103828 Woman in Gold,2015,Drama,33305037 Kinsey,2004,Drama,10214647 Army of Darkness,1992,Horror,11501093 Slackers,2002,Comedy,4814244 What's Eating Gilbert Grape,1993,Drama,9170214 The Visual Bible: The Gospel of John,2003,History,4068087 Vera Drake,2004,Drama,3753806 The Guru,2002,Romance,3034181 The Perez Family,1995,Comedy,2832826 Inside Llewyn Davis,2013,Drama,13214255 O,2001,Drama,16017403 Return to the Blue Lagoon,1991,Adventure,2807854 Copying Beethoven,2006,Music,352786 Poltergeist,1982,Horror,76600000 Saw V,2008,Mystery,56729973 Jindabyne,2006,Thriller,399879 Kabhi Alvida Naa Kehna,2006,Drama,3275443 An Ideal Husband,1999,Romance,18535191 The Last Days on Mars,2013,Thriller,23838 Darkness,2002,Horror,22160085 2001: A Space Odyssey,1968,Sci-Fi,56715371 E.T. the Extra-Terrestrial,1982,Family,434949459 In the Land of Women,2007,Drama,11043445 For Greater Glory: The True Story of Cristiada,2012,History,5669081 Good Will Hunting,1997,Drama,138339411 Saw III,2006,Horror,80150343 Stripes,1981,Action,85300000 Bring It On,2000,Sport,68353550 The Purge: Election Year,2016,Horror,78845130 She's All That,1999,Romance,63319509 Precious,2009,Drama,47536959 Saw IV,2007,Mystery,63270259 White Noise,2005,Drama,55865715 Madea's Family Reunion,2006,Drama,63231524 The Color of Money,1986,Drama,52293982 The Mighty Ducks,1992,Sport,50752337 The Grudge,2004,Mystery,110175871 Happy Gilmore,1996,Comedy,38624000 Jeepers Creepers,2001,Horror,37470017 Bill & Ted's Excellent Adventure,1989,Comedy,40485039 Oliver!,1968,Musical,16800000 The Best Exotic Marigold Hotel,2011,Drama,46377022 Recess: School's Out,2001,Family,36696761 Mad Max Beyond Thunderdome,1985,Sci-Fi,36200000 The Boy,2016,Thriller,35794166 Devil,2010,Thriller,33583175 Friday After Next,2002,Comedy,32983713 Insidious: Chapter 3,2015,Fantasy,52200504 The Last Dragon,1985,Comedy,33000000 Snatch,2000,Crime,30093107 The Lawnmower Man,1992,Sci-Fi,32101000 Nick and Norah's Infinite Playlist,2008,Music,31487293 Dogma,1999,Adventure,30651422 The Banger Sisters,2002,Comedy,30306281 Twilight Zone: The Movie,1983,Horror,29500000 Road House,1989,Action,30050028 A Low Down Dirty Shame,1994,Comedy,29392418 Swimfan,2002,Thriller,28563926 Employee of the Month,2006,Comedy,28435406 Can't Hardly Wait,1998,Comedy,25339117 The Outsiders,1983,Crime,25600000 Sinister 2,2015,Thriller,27736779 Sparkle,2012,Music,24397469 Valentine,2001,Horror,20384136 The Fourth Kind,2009,Sci-Fi,25464480 A Prairie Home Companion,2006,Music,20338609 Sugar Hill,1993,Thriller,18272447 Rushmore,1998,Comedy,17096053 Skyline,2010,Sci-Fi,21371425 The Second Best Exotic Marigold Hotel,2015,Comedy,33071558 Kit Kittredge: An American Girl,2008,Family,17655201 The Perfect Man,2005,Romance,16247775 Mo' Better Blues,1990,Drama,16153600 Kung Pow: Enter the Fist,2002,Action,16033556 Tremors,1990,Horror,16667084 Wrong Turn,2003,Thriller,15417771 The Corruptor,1999,Crime,15156200 Mud,2012,Drama,21589307 Reno 911!: Miami,2007,Comedy,20339754 One Direction: This Is Us,2013,Documentary,28873374 Hey Arnold! The Movie,2002,Family,13684949 My Week with Marilyn,2011,Drama,14597405 The Matador,2005,Thriller,12570442 Love Jones,1997,Drama,12514138 The Gift,2015,Mystery,43771291 End of the Spear,2005,Adventure,11703287 Get Over It,2001,Comedy,11560259 Office Space,1999,Comedy,10824921 Drop Dead Gorgeous,1999,Thriller,10561238 Big Eyes,2014,Biography,14479776 Very Bad Things,1998,Comedy,9801782 Sleepover,2004,Romance,8070311 MacGruber,2010,Action,8460995 Dirty Pretty Things,2002,Thriller,8111360 Movie 43,2013,Comedy,8828771 The Tourist,2010,Romance,67631157 Over Her Dead Body,2008,Romance,7563670 Seeking a Friend for the End of the World,2012,Adventure,6619173 American History X,1998,Drama,6712241 The Collection,2012,Thriller,6842058 Teacher's Pet,2004,Comedy,6491350 The Red Violin,1998,Romance,9473382 The Straight Story,1999,Drama,6197866 Deuces Wild,2002,Drama,6044618 Bad Words,2013,Comedy,7764027 Black or White,2014,Drama,21569041 On the Line,2001,Romance,4356743 Rescue Dawn,2006,Drama,5484375 "Jeff, Who Lives at Home",2011,Comedy,4244155 I Am Love,2009,Romance,5004648 Atlas Shrugged II: The Strike,2012,Drama,3333823 Romeo Is Bleeding,1993,Crime,3275585 The Limey,1999,Thriller,3193102 Crash,2004,Thriller,54557348 The House of Mirth,2000,Romance,3041803 Malone,1987,Thriller,3060858 Peaceful Warrior,2006,Drama,1055654 Bucky Larson: Born to Be a Star,2011,Comedy,2331318 Bamboozled,2000,Music,2185266 The Forest,2016,Thriller,26583369 Sphinx,1981,Adventure,800000 While We're Young,2014,Drama,7574066 A Better Life,2011,Drama,1754319 Spider,2002,Drama,1641788 Gun Shy,2000,Comedy,1631839 Nicholas Nickleby,2002,Drama,1309849 The Iceman,2012,Drama,1939441 Cecil B. DeMented,2000,Thriller,1276984 Killer Joe,2011,Romance,1987762 The Joneses,2009,Drama,1474508 Owning Mahowny,2003,Drama,1011054 The Brothers Solomon,2007,Comedy,900926 My Blueberry Nights,2007,Drama,866778 Swept Away,2002,Romance,598645 "War, Inc.",2008,Action,578527 Shaolin Soccer,2001,Action,488872 The Brown Bunny,2003,Drama,365734 Rosewater,2014,Biography,3093491 Imaginary Heroes,2004,Drama,228524 High Heels and Low Lifes,2001,Comedy,226792 Severance,2006,Thriller,136432 Edmond,2005,Drama,131617 Police Academy: Mission to Moscow,1994,Crime,126247 An Alan Smithee Film: Burn Hollywood Burn,1997,Comedy,15447 The Open Road,2009,Comedy,19348 The Good Guy,2009,Romance,100503 Motherhood,2009,Drama,92900 Blonde Ambition,2007,Comedy,5561 The Oxford Murders,2008,Thriller,3607 Eulogy,2004,Comedy,70527 "The Good, the Bad, the Weird",2008,Action,128486 The Lost City,2005,Drama,2483955 Next Friday,2000,Comedy,57176582 You Only Live Twice,1967,Adventure,43100000 Amour,2012,Drama,225377 Poltergeist III,1988,Horror,14114488 "It's a Mad, Mad, Mad, Mad World",1963,Comedy,46300000 Richard III,1995,War,2600000 Melancholia,2011,Drama,3029870 Jab Tak Hai Jaan,2012,Drama,3047539 Alien,1979,Sci-Fi,78900000 The Texas Chain Saw Massacre,1974,Horror,30859000 The Runaways,2010,Music,3571735 Fiddler on the Roof,1971,Romance,50000000 Thunderball,1965,Adventure,63600000 Set It Off,1996,Action,36049108 The Best Man,1999,Drama,34074895 Child's Play,1988,Horror,33244684 Sicko,2007,Drama,24530513 The Purge: Anarchy,2014,Horror,71519230 Down to You,2000,Romance,20035310 Harold & Kumar Go to White Castle,2004,Adventure,18225165 The Contender,2000,Drama,17804273 Boiler Room,2000,Thriller,16938179 Black Christmas,2006,Horror,16235293 Henry V,1989,War,10161099 The Way of the Gun,2000,Action,6047856 Igby Goes Down,2002,Drama,4681503 PCU,1994,Comedy,4350774 Gracie,2007,Drama,2955039 Trust the Man,2005,Romance,1530535 Hamlet 2,2008,Comedy,4881867 Glee: The 3D Concert Movie,2011,Music,11860839 The Legend of Suriyothai,2001,Adventure,454255 Two Evil Eyes,1990,Horror,349618 All or Nothing,2002,Drama,112935 Princess Kaiulani,2009,Drama,883887 Opal Dream,2006,Drama,13751 Flame and Citron,2008,Drama,145109 Undiscovered,2005,Comedy,1046166 Crocodile Dundee,1986,Comedy,174635000 Awake,2007,Crime,14373825 Skin Trade,2014,Action,162 Crazy Heart,2009,Drama,39462438 The Rose,1979,Romance,29200000 Baggage Claim,2013,Comedy,21564616 Election,1999,Drama,14879556 The DUFF,2015,Comedy,34017854 Glitter,2001,Drama,4273372 Bright Star,2009,Drama,4440055 My Name Is Khan,2010,Drama,4018695 Footloose,1984,Romance,80000000 Limbo,1999,Adventure,1997807 The Karate Kid,1984,Drama,90800000 Repo! The Genetic Opera,2008,Musical,140244 Pulp Fiction,1994,Drama,107930000 Nightcrawler,2014,Thriller,32279955 Club Dread,2004,Thriller,4992159 The Sound of Music,1965,Family,163214286 Splash,1984,Fantasy,69800000 Little Miss Sunshine,2006,Comedy,59889948 Stand by Me,1986,Adventure,52287414 28 Days Later...,2002,Drama,45063889 You Got Served,2004,Drama,40066497 Escape from Alcatraz,1979,Biography,36500000 Brown Sugar,2002,Comedy,27362712 A Thin Line Between Love and Hate,1996,Comedy,34746109 50/50,2011,Romance,34963967 Shutter,2008,Horror,25926543 That Awkward Moment,2014,Romance,26049082 Much Ado About Nothing,1993,Drama,22551000 On Her Majesty's Secret Service,1969,Adventure,22800000 New Nightmare,1994,Fantasy,18090181 Drive Me Crazy,1999,Comedy,17843379 Half Baked,1998,Crime,17278980 New in Town,2009,Comedy,16699684 Syriana,2005,Thriller,50815288 American Psycho,2000,Crime,15047419 The Good Girl,2002,Romance,14015786 The Boondock Saints II: All Saints Day,2009,Crime,10269307 Enough Said,2013,Comedy,17536788 Easy A,2010,Romance,58401464 Shadow of the Vampire,2000,Horror,8279017 Prom,2011,Drama,10106233 Held Up,1999,Comedy,4692814 Woman on Top,2000,Comedy,5018450 Anomalisa,2015,Animation,3442820 Another Year,2010,Comedy,3205244 8 Women,2002,Romance,3076425 Showdown in Little Tokyo,1991,Thriller,2275557 Clay Pigeons,1998,Crime,1789892 It's Kind of a Funny Story,2010,Comedy,6350058 Made in Dagenham,2010,History,1094798 When Did You Last See Your Father?,2007,Biography,1071240 Prefontaine,1997,Biography,532190 The Secret of Kells,2009,Animation,686383 Begin Again,2013,Drama,16168741 Down in the Valley,2005,Drama,568695 Brooklyn Rules,2007,Crime,398420 The Singing Detective,2003,Comedy,336456 Fido,2006,Horror,298110 The Wendell Baker Story,2005,Comedy,127144 Wild Target,2010,Crime,117190 Pathology,2008,Horror,108662 10th & Wolf,2006,Thriller,53481 Dear Wendy,2004,Romance,23106 Akira,1988,Sci-Fi,439162 Imagine Me & You,2005,Comedy,671240 The Blood of Heroes,1989,Sci-Fi,882290 Driving Miss Daisy,1989,Drama,106593296 Soul Food,1997,Comedy,43490057 Rumble in the Bronx,1995,Action,32333860 Thank You for Smoking,2005,Comedy,24792061 Hostel: Part II,2007,Horror,17544812 An Education,2009,Drama,12574715 The Hotel New Hampshire,1984,Drama,5100000 Narc,2002,Mystery,10460089 Men with Brooms,2002,Romance,4239767 Witless Protection,2008,Crime,4131640 Extract,2009,Crime,10814185 Code 46,2003,Thriller,197148 Crash,2004,Thriller,54557348 Albert Nobbs,2011,Drama,3014541 Persepolis,2007,War,4443403 The Neon Demon,2016,Thriller,1330827 Harry Brown,2009,Action,1818681 Spider-Man 3,2007,Romance,336530303 The Omega Code,1999,Action,12610552 Juno,2007,Drama,143492840 Diamonds Are Forever,1971,Adventure,43800000 The Godfather,1972,Drama,134821952 Flashdance,1983,Music,94900000 500 Days of Summer,2009,Comedy,32391374 The Piano,1993,Drama,40158000 Magic Mike,2012,Comedy,113709992 Darkness Falls,2003,Thriller,32131483 Live and Let Die,1973,Action,35400000 My Dog Skip,2000,Family,34099640 Jumping the Broom,2011,Drama,37295394 The Great Gatsby,2013,Drama,144812796 "Good Night, and Good Luck.",2005,Drama,31501218 Capote,2005,Biography,28747570 Desperado,1995,Thriller,25625110 The Claim,2000,Western,403932 Logan's Run,1976,Sci-Fi,25000000 The Man with the Golden Gun,1974,Adventure,21000000 Action Jackson,1988,Comedy,20257000 The Descent,2005,Horror,26005908 Devil's Due,2014,Horror,15818967 Flirting with Disaster,1996,Comedy,14891000 The Devil's Rejects,2005,Crime,16901126 Dope,2015,Drama,17474107 In Too Deep,1999,Drama,14003141 Skyfall,2012,Thriller,304360277 House of 1000 Corpses,2003,Horror,12583510 A Serious Man,2009,Comedy,9190525 Get Low,2009,Mystery,9176553 Warlock,1989,Horror,9094451 A Single Man,2009,Drama,9166863 The Last Temptation of Christ,1988,Drama,8373585 Outside Providence,1999,Romance,7292175 Bride & Prejudice,2004,Musical,6601079 Rabbit-Proof Fence,2002,Biography,6165429 Who's Your Caddy?,2007,Comedy,5694308 Split Second,1992,Crime,5430822 The Other Side of Heaven,2001,Drama,4720371 Redbelt,2008,Sport,2344847 Cyrus,2010,Drama,7455447 A Dog of Flanders,1999,Family,2148212 Auto Focus,2002,Drama,2062066 Factory Girl,2006,Drama,1654367 We Need to Talk About Kevin,2011,Drama,1738692 The Mighty Macs,2009,Sport,1889522 Mother and Child,2009,Drama,1110286 March or Die,1977,Drama,1000000 Les visiteurs,1993,Comedy,700000 Somewhere,2010,Drama,1768416 Chairman of the Board,1998,Comedy,306715 Hesher,2010,Drama,382946 The Heart of Me,2002,Romance,196067 Freeheld,2015,Biography,532988 The Extra Man,2010,Comedy,453079 Ca$h,2010,Crime,46451 Wah-Wah,2005,Drama,233103 Pale Rider,1985,Western,41400000 Dazed and Confused,1993,Comedy,7993039 The Chumscrubber,2005,Comedy,49526 Shade,2003,Thriller,10696 House at the End of the Street,2012,Horror,31607598 Incendies,2010,Drama,6857096 "Remember Me, My Love",2003,Romance,223878 Elite Squad,2007,Crime,8060 Annabelle,2014,Horror,84263837 Bran Nue Dae,2009,Musical,110029 Boyz n the Hood,1991,Drama,57504069 La Bamba,1987,Music,54215416 Dressed to Kill,1980,Romance,31899000 The Adventures of Huck Finn,1993,Family,24103594 Go,1999,Comedy,16842303 Friends with Money,2006,Comedy,13367101 Bats,1999,Thriller,10149779 Nowhere in Africa,2001,Biography,6173485 Layer Cake,2004,Drama,2338695 The Work and the Glory II: American Zion,2005,Drama,2024854 The East,2013,Drama,2268296 A Home at the End of the World,2004,Romance,1029017 The Messenger,2009,Drama,66637 Control,2007,Biography,871577 The Terminator,1984,Sci-Fi,38400000 Good Bye Lenin!,2003,Drama,4063859 The Damned United,2009,Drama,449558 Mallrats,1995,Romance,2122561 Grease,1978,Romance,181360000 Platoon,1986,War,137963328 Fahrenheit 9/11,2004,Drama,119078393 Butch Cassidy and the Sundance Kid,1969,Biography,102308900 Mary Poppins,1964,Comedy,102300000 Ordinary People,1980,Drama,54800000 Around the World in 80 Days,2004,Comedy,24004159 West Side Story,1961,Romance,43650000 Caddyshack,1980,Comedy,39800000 The Brothers,2001,Drama,27457409 The Wood,1999,Romance,25047631 The Usual Suspects,1995,Crime,23272306 A Nightmare on Elm Street 5: The Dream Child,1989,Thriller,22168359 Van Wilder: Party Liaison,2002,Romance,21005329 The Wrestler,2008,Drama,26236603 Duel in the Sun,1946,Western,20400000 Best in Show,2000,Comedy,18621249 Escape from New York,1981,Sci-Fi,25244700 School Daze,1988,Comedy,14545844 Daddy Day Camp,2007,Comedy,13235267 Mystic Pizza,1988,Drama,12793213 Sliding Doors,1998,Drama,11883495 Tales from the Hood,1995,Horror,11797927 The Last King of Scotland,2006,Biography,17605861 Halloween 5,1989,Thriller,11642254 Bernie,2011,Crime,9203192 Pollock,2000,Biography,8596914 200 Cigarettes,1999,Drama,6851636 The Words,2012,Mystery,11434867 Casa de mi Padre,2012,Western,5895238 City Island,2009,Drama,6670712 The Guard,2011,Comedy,5359774 College,2008,Comedy,4693919 The Virgin Suicides,1999,Drama,4859475 Miss March,2009,Romance,4542775 Wish I Was Here,2014,Drama,3588432 Simply Irresistible,1999,Romance,4394936 Hedwig and the Angry Inch,2001,Music,3029081 Only the Strong,1993,Action,3273588 Shattered Glass,2003,Drama,2207975 Novocaine,2001,Comedy,2025238 The Wackness,2008,Romance,2077046 Beastmaster 2: Through the Portal of Time,1991,Fantasy,869325 The 5th Quarter,2010,Sport,399611 The Greatest,2009,Romance,115862 Come Early Morning,2006,Romance,117560 Lucky Break,2001,Romance,54606 "Surfer, Dude",2008,Comedy,36497 Deadfall,2012,Crime,65804 L'auberge espagnole,2002,Comedy,3895664 Murder by Numbers,2002,Crime,31874869 Winter in Wartime,2008,Drama,542860 The Protector,2005,Drama,11905519 Bend It Like Beckham,2002,Sport,32541719 Sunshine State,2002,Drama,3064356 Crossover,2006,Action,7009668 [Rec] 2,2009,Horror,27024 The Sting,1973,Drama,159600000 Chariots of Fire,1981,Drama,58800000 Diary of a Mad Black Woman,2005,Comedy,50382128 Shine,1996,Romance,35811509 Don Jon,2013,Romance,24475193 Ghost World,2001,Comedy,6200756 Iris,2001,Romance,1292119 The Chorus,2004,Drama,3629758 Mambo Italiano,2003,Comedy,6239558 Wonderland,2003,Thriller,1056102 Do the Right Thing,1989,Drama,27545445 Harvard Man,2001,Thriller,56007 Le Havre,2011,Comedy,611709 R100,2013,Drama,22770 Salvation Boulevard,2011,Action,27445 The Ten,2007,Romance,766487 Headhunters,2011,Drama,1196752 Saint Ralph,2004,Sport,795126 Insidious: Chapter 2,2013,Horror,83574831 Saw II,2005,Mystery,87025093 10 Cloverfield Lane,2016,Thriller,71897215 Jackass: The Movie,2002,Comedy,64267897 Lights Out,2016,Horror,56536016 Paranormal Activity 3,2011,Horror,104007828 Ouija,2014,Fantasy,50820940 A Nightmare on Elm Street 3: Dream Warriors,1987,Action,44793200 The Gift,2015,Mystery,43771291 Instructions Not Included,2013,Drama,44456509 Paranormal Activity 4,2012,Horror,53884821 The Robe,1953,History,36000000 Freddy's Dead: The Final Nightmare,1991,Thriller,34872293 Monster,2003,Crime,34468224 Paranormal Activity: The Marked Ones,2014,Thriller,32453345 Dallas Buyers Club,2013,Drama,27296514 The Lazarus Effect,2015,Sci-Fi,25799043 Memento,2000,Mystery,25530884 Oculus,2013,Horror,27689474 Clerks II,2006,Comedy,24138847 Billy Elliot,2000,Drama,21994911 The Way Way Back,2013,Drama,21501098 House Party 2,1991,Romance,19281235 Doug's 1st Movie,1999,Comedy,19421271 The Apostle,1997,Drama,20733485 Our Idiot Brother,2011,Comedy,24809547 The Players Club,1998,Drama,23031390 O,2001,Thriller,16017403 "As Above, So Below",2014,Horror,21197315 Addicted,2014,Drama,17382982 Eve's Bayou,1997,Drama,14821531 Still Alice,2014,Drama,18656400 Friday the 13th Part VIII: Jason Takes Manhattan,1989,Horror,14343976 My Big Fat Greek Wedding,2002,Romance,241437427 Spring Breakers,2012,Drama,14123773 Halloween: The Curse of Michael Myers,1995,Thriller,15126948 Y Tu Mamá También,2001,Adventure,13622333 Shaun of the Dead,2004,Horror,13464388 The Haunting of Molly Hartley,2008,Drama,13350177 Lone Star,1996,Mystery,13269963 Halloween 4: The Return of Michael Myers,1988,Horror,17768000 April Fool's Day,1986,Horror,12947763 Diner,1982,Comedy,14100000 Lone Wolf McQuade,1983,Action,12200000 Apollo 18,2011,Horror,17683670 Sunshine Cleaning,2008,Comedy,12055108 No Escape,2015,Action,27285953 Not Easily Broken,2009,Drama,10572742 Digimon: The Movie,2000,Sci-Fi,9628751 Saved!,2004,Drama,8786715 The Barbarian Invasions,2003,Romance,3432342 The Forsaken,2001,Thriller,6755271 UHF,1989,Drama,6157157 Slums of Beverly Hills,1998,Drama,5480318 Made,2001,Crime,5308707 Moon,2009,Mystery,5009677 The Sweet Hereafter,1997,Drama,4306697 Of Gods and Men,2010,Drama,3950029 Bottle Shock,2008,Drama,4040588 Heavenly Creatures,1994,Drama,3049135 90 Minutes in Heaven,2015,Drama,4700361 Everything Must Go,2010,Comedy,2711210 Zero Effect,1998,Comedy,1980338 The Machinist,2004,Thriller,1082044 Light Sleeper,1992,Drama,1100000 Kill the Messenger,2014,Drama,2445646 Rabbit Hole,2010,Drama,2221809 Party Monster,2003,Thriller,296665 Green Room,2015,Thriller,3219029 Bottle Rocket,1996,Drama,1040879 Albino Alligator,1996,Thriller,326308 "Lovely, Still",2008,Drama,124720 Desert Blue,1998,Drama,99147 Redacted,2007,Crime,65087 Fascination,2004,Thriller,16066 I Served the King of England,2006,Comedy,617228 Sling Blade,1996,Drama,24475416 Hostel,2005,Horror,47277326 Tristram Shandy: A Cock and Bull Story,2005,Drama,1247453 Take Shelter,2011,Thriller,1729969 Lady in White,1988,Mystery,1705139 The Texas Chainsaw Massacre 2,1986,Horror,8025872 Only God Forgives,2013,Drama,778565 The Names of Love,2010,Comedy,513836 Savage Grace,2007,Drama,434417 Police Academy,1984,Comedy,81200000 Four Weddings and a Funeral,1994,Romance,52700832 25th Hour,2002,Drama,13060843 Bound,1996,Thriller,3798532 Requiem for a Dream,2000,Drama,3609278 Tango,1998,Musical,1687311 Donnie Darko,2001,Thriller,727883 Character,1997,Mystery,713413 Spun,2002,Drama,410241 Lady Vengeance,2005,Crime,211667 Mean Machine,2001,Drama,92191 Exiled,2006,Action,49413 After.Life,2009,Horror,108229 One Flew Over the Cuckoo's Nest,1975,Drama,112000000 The Sweeney,2012,Action,26345 Whale Rider,2002,Drama,20772796 Pan,2015,Adventure,34964818 Night Watch,2004,Fantasy,1487477 The Crying Game,1992,Thriller,62549000 Porky's,1981,Comedy,105500000 Survival of the Dead,2009,Horror,101055 Lost in Translation,2003,Drama,44566004 Annie Hall,1977,Romance,39200000 The Greatest Show on Earth,1952,Romance,36000000 Exodus: Gods and Kings,2014,Adventure,65007045 Monster's Ball,2001,Romance,31252964 Maggie,2015,Drama,131175 Leaving Las Vegas,1995,Drama,31968347 The Boy Next Door,2015,Thriller,35385560 The Kids Are All Right,2010,Comedy,20803237 They Live,1988,Thriller,13008928 The Last Exorcism Part II,2013,Horror,15152879 Boyhood,2014,Drama,25359200 Scoop,2006,Comedy,10515579 Planet of the Apes,2001,Adventure,180011740 The Wash,2001,Comedy,10097096 3 Strikes,2000,Comedy,9821335 The Cooler,2003,Romance,8243880 The Night Listener,2006,Mystery,7825820 My Soul to Take,2010,Mystery,14637490 The Orphanage,2007,Thriller,7159147 A Haunted House 2,2014,Comedy,17314483 The Rules of Attraction,2002,Comedy,6525762 Four Rooms,1995,Comedy,4301331 Secretary,2002,Comedy,4046737 The Real Cancun,2003,Documentary,3713002 Talk Radio,1988,Drama,3468572 Waiting for Guffman,1996,Comedy,2892582 Love Stinks,1999,Comedy,2800000 You Kill Me,2007,Crime,2426851 Thumbsucker,2005,Comedy,1325073 Mirrormask,2005,Adventure,864959 Samsara,2011,Music,2601847 The Barbarians,1987,Adventure,800000 Poolhall Junkies,2002,Drama,562059 The Loss of Sexual Innocence,1999,Drama,399793 Joe,2013,Drama,371897 Shooting Fish,1997,Crime,302204 Prison,1987,Crime,354704 Psycho Beach Party,2000,Mystery,265107 The Big Tease,1999,Comedy,185577 Trust,2010,Crime,58214 An Everlasting Piece,2000,Comedy,75078 Adore,2013,Drama,317125 Mondays in the Sun,2002,Drama,146402 Stake Land,2010,Sci-Fi,18469 The Last Time I Committed Suicide,1997,Drama,12836 Futuro Beach,2014,Drama,20262 Gone with the Wind,1939,War,198655278 Desert Dancer,2014,Drama,143653 Major Dundee,1965,Adventure,14873 Annie Get Your Gun,1950,Romance,8000000 Defendor,2009,Drama,37606 The Pirate,1948,Musical,2956000 The Good Heart,2009,Drama,19959 The History Boys,2006,Comedy,2706659 Unknown,2011,Action,61094903 The Full Monty,1997,Music,45857453 Airplane!,1980,Comedy,83400000 Friday,1995,Drama,27900000 Menace II Society,1993,Drama,27900000 Creepshow 2,1987,Horror,14000000 The Witch,2015,Mystery,25138292 I Got the Hook Up,1998,Comedy,10305534 She's the One,1996,Romance,9449219 Gods and Monsters,1998,Biography,6390032 The Secret in Their Eyes,2009,Mystery,20167424 Evil Dead II,1987,Horror,5923044 Pootie Tang,2001,Musical,3293258 La otra conquista,1998,History,886410 Trollhunter,2010,Horror,252652 Ira & Abby,2006,Romance,220234 The Watch,2012,Sci-Fi,34350553 Winter Passing,2005,Comedy,101228 D.E.B.S.,2004,Romance,96793 March of the Penguins,2005,Documentary,77413017 Margin Call,2011,Biography,5354039 Choke,2008,Drama,2926565 Whiplash,2014,Drama,13092000 City of God,2002,Drama,7563397 Human Traffic,1999,Music,104257 The Hunt,2012,Drama,610968 Bella,2006,Romance,8108247 Maria Full of Grace,2004,Drama,6517198 Beginners,2010,Drama,5776314 Animal House,1978,Comedy,141600000 Goldfinger,1964,Thriller,51100000 Trainspotting,1996,Drama,16501785 The Original Kings of Comedy,2000,Documentary,38168022 Paranormal Activity 2,2010,Horror,84749884 Waking Ned Devine,1998,Comedy,24788807 Bowling for Columbine,2002,Drama,21244913 A Nightmare on Elm Street 2: Freddy's Revenge,1985,Fantasy,30000000 A Room with a View,1985,Romance,20966644 The Purge,2013,Horror,64423650 Sinister,2012,Horror,48056940 Martin Lawrence Live: Runteldat,2002,Comedy,19184015 Air Bud,1997,Comedy,24629916 Jason Lives: Friday the 13th Part VI,1986,Horror,19472057 The Bridge on the River Kwai,1957,War,27200000 Spaced Invaders,1990,Adventure,15369573 Jason Goes to Hell: The Final Friday,1993,Fantasy,15935068 Dave Chappelle's Block Party,2005,Documentary,11694528 Next Day Air,2009,Comedy,10017041 Phat Girlz,2006,Comedy,7059537 Before Midnight,2013,Romance,8114507 Teen Wolf Too,1987,Fantasy,7888703 Phantasm II,1988,Sci-Fi,7282851 Real Women Have Curves,2002,Comedy,5844929 East Is East,1999,Drama,4170647 Whipped,2000,Comedy,4142507 Kama Sutra: A Tale of Love,1996,Crime,4109095 Warlock: The Armageddon,1993,Fantasy,3902679 8 Heads in a Duffel Bag,1997,Crime,3559990 Thirteen Conversations About One Thing,2001,Drama,3287435 Jawbreaker,1999,Thriller,3071947 Basquiat,1996,Biography,2961991 Tsotsi,2005,Drama,2912363 DysFunktional Family,2003,Comedy,2223990 Tusk,2014,Horror,1821983 Oldboy,2003,Thriller,2181290 Letters to God,2010,Family,2848578 Hobo with a Shotgun,2011,Action,703002 Bachelorette,2012,Romance,418268 Tim and Eric's Billion Dollar Movie,2012,Comedy,200803 The Gambler,2014,Thriller,33631221 Summer Storm,2004,Sport,95016 Chain Letter,2009,Horror,143000 Just Looking,1999,Drama,39852 The Divide,2011,Thriller,22000 Alice in Wonderland,2010,Fantasy,334185206 Cinderella,2015,Fantasy,201148159 Central Station,1998,Drama,5595428 Boynton Beach Club,2005,Romance,3123749 High Tension,2003,Horror,3645438 Hustle & Flow,2005,Crime,22201636 Some Like It Hot,1959,Romance,25000000 Friday the 13th Part VII: The New Blood,1988,Horror,19170001 The Wizard of Oz,1939,Fantasy,22202612 Young Frankenstein,1974,Comedy,86300000 Diary of the Dead,2007,Horror,952620 Ulee's Gold,1997,Drama,9054736 Blazing Saddles,1974,Western,119500000 Friday the 13th: The Final Chapter,1984,Thriller,32600000 Maurice,1987,Romance,3130592 The Astronaut's Wife,1999,Thriller,10654581 Timecrimes,2007,Sci-Fi,38108 A Haunted House,2013,Fantasy,40041683 2016: Obama's America,2012,Documentary,33349949 Halloween II,2009,Horror,33386128 That Thing You Do!,1996,Comedy,25809813 Halloween III: Season of the Witch,1982,Mystery,14400000 Kevin Hart: Let Me Explain,2013,Comedy,32230907 My Own Private Idaho,1991,Drama,6401336 Garden State,2004,Comedy,26781723 Before Sunrise,1995,Romance,5400000 Jesus' Son,1999,Drama,1282084 Robot & Frank,2012,Crime,3325638 My Life Without Me,2003,Romance,395592 The Spectacular Now,2013,Comedy,6851969 Religulous,2008,Comedy,12995673 Fuel,2008,Documentary,173783 Dodgeball: A True Underdog Story,2004,Sport,114324072 Eye of the Dolphin,2006,Family,71904 8: The Mormon Proposition,2010,Documentary,99851 The Other End of the Line,2008,Drama,115504 Anatomy,2000,Horror,5725 Sleep Dealer,2008,Thriller,75727 Super,2010,Drama,322157 Get on the Bus,1996,Drama,5731103 Thr3e,2006,Drama,978908 This Is England,2006,Crime,327919 Go for It!,2011,Musical,178739 Friday the 13th Part III,1982,Thriller,36200000 Friday the 13th: A New Beginning,1985,Thriller,21300000 The Last Sin Eater,2007,Drama,379643 The Best Years of Our Lives,1946,Drama,23650000 Elling,2001,Comedy,313436 From Russia with Love,1963,Thriller,24800000 The Toxic Avenger Part II,1989,Comedy,792966 It Follows,2014,Horror,14673301 Mad Max 2: The Road Warrior,1981,Action,9003011 The Legend of Drunken Master,1994,Comedy,11546543 Boys Don't Cry,1999,Crime,11533945 Silent House,2011,Drama,12555230 The Lives of Others,2006,Thriller,11284657 Courageous,2011,Drama,34522221 The Triplets of Belleville,2003,Animation,7002255 Smoke Signals,1998,Comedy,6719300 Before Sunset,2004,Drama,5792822 Amores Perros,2000,Thriller,5383834 Thirteen,2003,Drama,4599680 Winter's Bone,2010,Drama,6531491 Me and You and Everyone We Know,2005,Comedy,3885134 We Are Your Friends,2015,Drama,3590010 Harsh Times,2005,Thriller,3335839 Captive,2015,Thriller,2557668 Full Frontal,2002,Romance,2506446 Witchboard,1986,Thriller,7369373 Hamlet,1996,Drama,4414535 Shortbus,2006,Drama,1984378 Waltz with Bashir,2008,Documentary,2283276 "The Book of Mormon Movie, Volume 1: The Journey",2003,Adventure,1098224 The Diary of a Teenage Girl,2015,Drama,1477002 In the Shadow of the Moon,2007,History,1134049 The Virginity Hit,2010,Comedy,535249 House of D,2004,Comedy,371081 Six-String Samurai,1998,Drama,124494 Saint John of Las Vegas,2009,Drama,100669 Stonewall,2015,Drama,186354 London,2005,Drama,12667 Sherrybaby,2006,Drama,198407 Stealing Harvard,2002,Crime,13973532 Gangster's Paradise: Jerusalema,2008,Drama,4958 The Lady from Shanghai,1947,Crime,7927 The Ghastly Love of Johnny X,2012,Comedy,2436 River's Edge,1986,Drama,4600000 Northfork,2003,Drama,1420578 Buried,2010,Drama,1028658 One to Another,2006,Drama,18435 Carrie,2013,Fantasy,35266619 A Nightmare on Elm Street,1984,Horror,26505000 Man on Wire,2008,Crime,2957978 Brotherly Love,2015,Drama,444044 The Last Exorcism,2010,Horror,40990055 El crimen del padre Amaro,2002,Drama,5709616 Beasts of the Southern Wild,2012,Drama,12784397 Songcatcher,2000,Music,3050934 Run Lola Run,1998,Crime,7267324 May,2002,Horror,145540 In the Bedroom,2001,Drama,35918429 I Spit on Your Grave,2010,Horror,92401 "Happy, Texas",1999,Crime,1943649 My Summer of Love,2004,Drama,992238 The Lunchbox,2013,Drama,4231500 Yes,2004,Drama,396035 Caramel,2007,Romance,1060591 Mississippi Mermaid,1969,Drama,26893 I Love Your Work,2003,Mystery,2580 Dawn of the Dead,2004,Thriller,58885635 Waitress,2007,Drama,19067631 Bloodsport,1988,Drama,11806119 The Squid and the Whale,2005,Drama,7362100 Kissing Jessica Stein,2001,Comedy,7022940 Exotica,1994,Romance,5132222 Buffalo '66,1998,Comedy,2365931 Insidious,2010,Horror,53991137 Nine Queens,2000,Drama,1221261 The Ballad of Jack and Rose,2005,Drama,712294 The To Do List,2013,Comedy,3447339 Killing Zoe,1993,Thriller,418953 The Believer,2001,Drama,406035 Session 9,2001,Horror,373967 I Want Someone to Eat Cheese With,2006,Romance,194568 Modern Times,1936,Drama,163245 Stolen Summer,2002,Drama,119841 My Name Is Bruce,2007,Fantasy,173066 Pontypool,2008,Fantasy,3478 Trucker,2008,Drama,52166 The Lords of Salem,2012,Drama,1163508 Jack Reacher,2012,Crime,80033643 Snow White and the Seven Dwarfs,1937,Musical,184925485 The Holy Girl,2004,Drama,304124 Incident at Loch Ness,2004,Comedy,36830 "Lock, Stock and Two Smoking Barrels",1998,Crime,3650677 The Celebration,1998,Drama,1647780 Trees Lounge,1996,Drama,695229 Journey from the Fall,2006,Drama,638951 The Basket,1999,Drama,609042 Mercury Rising,1998,Crime,32940507 The Hebrew Hammer,2003,Comedy,19539 Friday the 13th Part 2,1981,Mystery,19100000 "Sex, Lies, and Videotape",1989,Drama,24741700 Saw,2004,Mystery,55153403 Super Troopers,2001,Comedy,18488314 The Day the Earth Stood Still,2008,Sci-Fi,79363785 Monsoon Wedding,2001,Comedy,13876974 You Can Count on Me,2000,Drama,9180275 Lucky Number Slevin,2006,Crime,22494487 But I'm a Cheerleader,1999,Comedy,2199853 Home Run,2013,Sport,2859955 Reservoir Dogs,1992,Crime,2812029 "The Good, the Bad and the Ugly",1966,Western,6100000 The Second Mother,2015,Comedy,375723 Blue Like Jazz,2012,Drama,594904 Down and Out with the Dolls,2001,Music,58936 Airborne,1993,Adventure,2850263 Waiting...,2005,Comedy,16101109 From a Whisper to a Scream,1987,Horror,1400000 Beyond the Black Rainbow,2010,Sci-Fi,56129 The Raid: Redemption,2011,Thriller,4105123 Rocky,1976,Drama,117235247 The Fog,1980,Horror,21378000 Unfriended,2014,Thriller,31537320 The Howling,1981,Horror,17986000 Dr. No,1962,Action,16067035 Chernobyl Diaries,2012,Thriller,18112929 Hellraiser,1987,Horror,14564027 God's Not Dead 2,2016,Drama,20773070 Cry_Wolf,2005,Mystery,10042266 Godzilla 2000,1999,Thriller,10037390 Blue Valentine,2010,Romance,9701559 Transamerica,2005,Adventure,9013113 The Devil Inside,2012,Horror,53245055 Beyond the Valley of the Dolls,1970,Music,9000000 The Green Inferno,2013,Horror,7186670 The Sessions,2012,Romance,5997134 Next Stop Wonderland,1998,Romance,3386698 Juno,2007,Comedy,143492840 Frozen River,2008,Drama,2508841 20 Feet from Stardom,2013,Documentary,4946250 Two Girls and a Guy,1997,Drama,1950218 Walking and Talking,1996,Comedy,1277257 The Full Monty,1997,Comedy,45857453 Who Killed the Electric Car?,2006,Documentary,1677838 The Broken Hearts Club: A Romantic Comedy,2000,Sport,1744858 Goosebumps,2015,Horror,80021740 Slam,1998,Drama,982214 Brigham City,2001,Crime,798341 All the Real Girls,2003,Romance,548712 Dream with the Fishes,1997,Drama,464655 Blue Car,2002,Drama,464126 Wristcutters: A Love Story,2006,Drama,104077 The Battle of Shaker Heights,2003,Comedy,279282 The Lovely Bones,2009,Fantasy,43982842 The Act of Killing,2012,Documentary,484221 Taxi to the Dark Side,2007,Crime,274661 Once in a Lifetime: The Extraordinary Story of the New York Cosmos,2006,Sport,144431 Antarctica: A Year on Ice,2013,Biography,287761 Hardflip,2012,Action,96734 The House of the Devil,2009,Horror,100659 The Perfect Host,2010,Comedy,48430 Safe Men,1998,Comedy,21210 The Specials,2000,Comedy,12996 Alone with Her,2006,Crime,10018 Creative Control,2015,Drama,62480 Special,2006,Drama,6387 In Her Line of Fire,2006,Drama,721 The Jimmy Show,2001,Drama,703 Trance,2013,Mystery,2319187 On the Waterfront,1954,Romance,9600000 L!fe Happens,2011,Comedy,20186 "4 Months, 3 Weeks and 2 Days",2007,Drama,1185783 Hard Candy,2005,Thriller,1007962 The Quiet,2005,Drama,381186 Fruitvale Station,2013,Romance,16097842 The Brass Teapot,2012,Fantasy,6643 Snitch,2013,Action,42919096 Latter Days,2003,Drama,819939 "For a Good Time, Call...",2012,Comedy,1243961 Time Changer,2002,Fantasy,15278 A Separation,2011,Mystery,7098492 Welcome to the Dollhouse,1995,Comedy,4771000 Ruby in Paradise,1993,Romance,1001437 Raising Victor Vargas,2002,Drama,2073984 Deterrence,1999,Drama,144583 Dead Snow,2009,Comedy,41709 American Graffiti,1973,Drama,115000000 Aqua Teen Hunger Force Colon Movie Film for Theaters,2007,Sci-Fi,5518918 Safety Not Guaranteed,2012,Comedy,4007792 Kill List,2011,Crime,26297 The Innkeepers,2011,Horror,77501 The Unborn,2009,Fantasy,42638165 Interview with the Assassin,2002,Drama,47329 Donkey Punch,2008,Drama,18378 Hoop Dreams,1994,Sport,7830611 King Kong,2005,Action,218051260 House of Wax,2005,Horror,32048809 Half Nelson,2006,Drama,2694973 Top Hat,1935,Musical,3000000 The Blair Witch Project,1999,Horror,140530114 Woodstock,1970,Documentary,13300000 Mercy Streets,2000,Drama,171988 Broken Vessels,1998,Drama,13493 A Hard Day's Night,1964,Musical,515005 Fireproof,2008,Romance,33451479 Benji,1974,Adventure,39552600 Open Water,2003,Drama,30500882 Kingdom of the Spiders,1977,Horror,17000000 The Station Agent,2003,Comedy,5739376 To Save a Life,2009,Drama,3773863 Beyond the Mat,1999,Documentary,2047570 Osama,2003,Drama,1127331 Sholem Aleichem: Laughing in the Darkness,2011,Documentary,906666 Groove,2000,Music,1114943 Twin Falls Idaho,1999,Drama,985341 Mean Creek,2004,Drama,603943 Hurricane Streets,1997,Drama,334041 Never Again,2001,Comedy,295468 Civil Brand,2002,Crime,243347 Lonesome Jim,2005,Comedy,154077 Seven Samurai,1954,Drama,269061 Finishing the Game: The Search for a New Bruce Lee,2007,Comedy,52850 Rubber,2010,Comedy,98017 Home,2015,Adventure,177343675 Kiss the Bride,2007,Romance,31937 The Slaughter Rule,2002,Drama,13134 Monsters,2010,Thriller,237301 Detention of the Dead,2012,Horror,1332 Crossroads,2002,Drama,37188667 Oz the Great and Powerful,2013,Adventure,234903076 Straight Out of Brooklyn,1991,Drama,2712293 Bloody Sunday,2002,History,768045 Conversations with Other Women,2005,Drama,379122 Poultrygeist: Night of the Chicken Dead,2006,Comedy,23000 42nd Street,1933,Comedy,2300000 Metropolitan,1990,Drama,2938208 Napoleon Dynamite,2004,Comedy,44540956 Blue Ruin,2013,Drama,258113 Paranormal Activity,2007,Horror,107917283 Monty Python and the Holy Grail,1975,Fantasy,1229197 Quinceañera,2006,Drama,1689999 Tarnation,2003,Documentary,592014 The Beyond,1981,Horror,126387 What Happens in Vegas,2008,Comedy,80276912 The Broadway Melody,1929,Musical,2808000 Maniac,2012,Horror,12843 Murderball,2005,Documentary,1523883 American Ninja 2: The Confrontation,1987,Action,4000000 Halloween,1978,Thriller,47000000 Tumbleweeds,1999,Drama,1281176 The Prophecy,1995,Thriller,16115878 When the Cat's Away,1996,Comedy,1652472 Pieces of April,2003,Drama,2360184 Old Joy,2006,Drama,255352 Wendy and Lucy,2008,Drama,856942 Fighting Tommy Riley,2004,Drama,5199 Across the Universe,2007,Musical,24343673 Locker 13,2014,Thriller,2468 Compliance,2012,Crime,318622 Chasing Amy,1997,Comedy,12006514 Lovely & Amazing,2001,Drama,4186931 Better Luck Tomorrow,2002,Romance,3799339 The Incredibly True Adventure of Two Girls in Love,1995,Comedy,1977544 Chuck & Buck,2000,Drama,1050600 American Desi,2001,Comedy,902835 Cube,1997,Mystery,489220 I Married a Strange Person!,1997,Animation,203134 November,2004,Drama,191309 Like Crazy,2011,Romance,3388210 The Canyons,2013,Thriller,49494 Burn,2012,Documentary,111300 Urbania,2000,Drama,1027119 "The Beast from 20,000 Fathoms",1953,Horror,5000000 Swingers,1996,Comedy,4505922 A Fistful of Dollars,1964,Drama,3500000 Side Effects,2013,Drama,32154410 The Trials of Darryl Hunt,2006,Documentary,1111 Children of Heaven,1997,Family,925402 Weekend,2011,Romance,469947 She's Gotta Have It,1986,Comedy,7137502 Another Earth,2011,Romance,1316074 Sweet Sweetback's Baadasssss Song,1971,Thriller,15180000 Tadpole,2000,Romance,2882062 Once,2007,Music,9437933 The Horse Boy,2009,Documentary,155984 The Texas Chain Saw Massacre,1974,Horror,30859000 Roger & Me,1989,Documentary,6706368 Facing the Giants,2006,Sport,10174663 The Gallows,2015,Horror,22757819 Hollywood Shuffle,1987,Comedy,5228617 The Lost Skeleton of Cadavra,2001,Horror,110536 Cheap Thrills,2013,Drama,59379 The Last House on the Left,2009,Thriller,32721635 Pi,1998,Thriller,3216970 20 Dates,1998,Comedy,536767 Super Size Me,2004,Comedy,11529368 The FP,2011,Comedy,40557 Happy Christmas,2014,Comedy,30084 The Brothers McMullen,1995,Drama,10246600 Tiny Furniture,2010,Romance,389804 George Washington,2000,Drama,241816 Smiling Fish & Goat on Fire,1999,Comedy,277233 Clerks,1994,Comedy,3151130 In the Company of Men,1997,Comedy,2856622 Sabotage,2014,Action,10499968 Slacker,1991,Drama,1227508 Clean,2004,Romance,136007 The Circle,2000,Drama,673780 Primer,2004,Thriller,424760 El Mariachi,1992,Romance,2040920 My Date with Drew,2004,Documentary,85222 ================================================ FILE: R/inst/tutorials/03-playlist-redux/playlist.R ================================================ library(metaflow) # Use the Metaflow client to retrieve the latest successful run from our # MovieStatsFlow and assign them as data artifacts in this flow. start <- function(self){ # Loads the movie data into a data frame self$df <- read.csv("./movies.csv", stringsAsFactors=FALSE) message("Using metadata provider: ", get_metadata()) flow <- flow_client$new("MovieStatsFlow") run <- run_client$new(flow, flow$latest_successful_run) message("Using analysis from: ", run$pathspec) self$genre_stats <- run$artifact("stats") } # Pick some movies from the genre with highest median gross box office # which we calculated in MovieStatsFlow pick_movie <- function(self){ sort_order <- order(self$genre_stats$median, decreasing=TRUE) sorted_stats <- self$genre_stats[sort_order, ] self$picked_genre <- sorted_stats$genres[1] message("Picked genre: ", self$picked_genre, " with the highest median gross box office.") # generate a randomized playlist of titles of the picked genre movie_by_genre <- self$df[self$df$genre == self$picked_genre, ] shuffled_rows <- sample(nrow(movie_by_genre)) self$playlist <- movie_by_genre[shuffled_rows, ] } # Print out the picked movies end <- function(self){ message("Playlist for movies in picked genre: ", self$picked_genre) for (i in 1:nrow(self$playlist)){ message(sprintf("Pick %d: %s", i, self$playlist$movie_title[i])) if (i >= self$top_k) break; } } metaflow("PlayListReduxFlow") %>% parameter("top_k", help = "The number of movies to recommend in the playlist.", default = 5, type = "int") %>% step(step = "start", r_function = start, next_step = "pick_movie") %>% step(step = "pick_movie", r_function = pick_movie, next_step = "end") %>% step(step = "end", r_function = end) %>% run() ================================================ FILE: R/inst/tutorials/04-helloaws/README.md ================================================ # Episode 04-helloaws: Look Mom, We're in the Cloud. **This flow is a simple linear workflow that verifies your AWS configuration. The 'start' and 'end' steps will run locally, while the 'hello' step will run remotely on AWS batch. After configuring Metaflow to run on AWS, data and metadata about your runs will be stored remotely. This means you can use the client to access information about any flow from anywhere.** #### Showcasing: - AWS batch decorator. - Accessing data artifacts generated remotely in a local notebook. - retry decorator. #### Before playing this episode: 1. Configure your sandbox: https://docs.metaflow.org/metaflow-on-aws/metaflow-sandbox #### To play this episode: ##### Execute the flow: In a terminal: 1. ```cd tutorials/04-helloaws``` 2. ```Rscript helloaws.R run``` If you are using RStudio, you can run this script by directly executing `source("helloaws.R")`. ##### Inspect the results: Open the R Markdown file ```helloaws.Rmd``` in RStudio and execute the markdown cells. ================================================ FILE: R/inst/tutorials/04-helloaws/helloaws.R ================================================ # A flow where Metaflow prints 'Hi'. # Run this flow to validate that Metaflow is installed correctly. library(metaflow) # This is the 'start' step. All flows must have a step named # 'start' that is the first step in the flow. start <- function(self){ message("HelloAWS is starting.") message("Using metadata provider: ", get_metadata()) } # A step for metaflow to introduce itself. hello <- function(self){ self$message <- "We're on the cloud! Metaflow says: Hi!" print(self$message) message("Using metadata provider: ", get_metadata()) } # This is the 'end' step. All flows must have an 'end' step, # which is the last step in the flow. end <- function(self){ message("HelloAWS is all done.") } metaflow("HelloAWSFlow") %>% step(step = "start", r_function = start, next_step = "hello") %>% step(step = "hello", decorator("retry", times=2), decorator("batch", cpu=2, memory=2048), r_function = hello, next_step = "end") %>% step(step = "end", r_function = end) %>% run() ================================================ FILE: R/inst/tutorials/04-helloaws/helloaws.Rmd ================================================ --- title: "Episode 04-helloaws: Look Mom, We're in the Cloud" output: html_notebook --- In HellowAWSFlow, the 'start' and 'end' steps were run locally, while the 'hello' step was run remotely on AWS batch. Since we are using AWS, data artifacts and metadata were stored remotely. This means you can use the client to access information about any flow from anywhere. This notebook shows you how. ## Import the metaflow client ```{r} library(metaflow) message("Current metaadata provider: ", get_metadata()) ``` Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Cmd+Option+I*. ## Print the message generated from the flow ```{r} flow <- flow_client$new("HelloAWSFlow") run <- run_client$new(flow, flow$latest_successful_run) message("Using run: ", run$pathspec) message(run$artifact("message")) ``` ================================================ FILE: R/inst/tutorials/05-statistics-redux/README.md ================================================ # Episode 05-statistics-redux: Computing in the Cloud. **This example revisits 'Episode 02-statistics: Is this Data Science?'. With Metaflow, you don't need to make any code changes to scale-up your flow by running on remote compute. In this example we re-run the 'stats.R' workflow adding the '--with batch' command line argument. This instructs Metaflow to run all your steps on AWS batch without changing any code. You can control the behavior with additional arguments, like '--max-workers'. For this example, 'max-workers' is used to limit the number of parallel genre-specific statistics computations. You can then access the data artifacts (even the local CSV file) from anywhere because the data is being stored in AWS S3.** #### Showcasing: - ```--with batch``` command line option - ```--max-workers``` command line option - Accessing data artifact stored in AWS S3 from a local Markdown Notebook. #### Before playing this episode: 1. Configure your sandbox: https://docs.metaflow.org/metaflow-on-aws/metaflow-sandbox #### To play this episode: ##### Execute the flow: In a terminal: 1. ```cd tutorials/02-statistics/``` 2. ```Rscript stats.R --package-suffixes=.R,.csv run --with batch --max-workers 4``` If you are using RStudio, you can replace the last line `run()` with ```R run(batch=TRUE, max_workers=4, package_suffixes=".R,.csv,") ``` and run by `source("stats.R")`. ##### Inspect the results: Open the R markdown file ```02-statistics/stats.Rmd``` in your RStudio and re-run the cells. You can access the artifacts stored in AWS S3 from your local RStudio session. ================================================ FILE: R/inst/tutorials/06-worldview/README.md ================================================ # Episode 06-worldview: Way up here. **This episode shows how you can use a notebook to setup a simple dashboard to monitor all of your Metaflow flows.** #### Showcasing: - The metaflow client API. #### Before playing this episode: 1. Configure your sandbox: https://docs.metaflow.org/metaflow-on-aws/metaflow-sandbox #### To play this episode: 1. ```cd tutorials/06-worldview/``` 2. Open ```worldview.Rmd``` in RStudio on your local computer ================================================ FILE: R/inst/tutorials/06-worldview/worldview.Rmd ================================================ --- title: "Episode 06: Way up here." output: html_notebook --- This notebook shows how you can see some basic information about all Metaflow flows that you've run. ## Check metadata provider and your namespace We will be able to see all flows registered with this metadata provider across all namespaces. If you're sharing the AWS metadata provider with your colleagues, you will be able to see all of your colleagues' flows as well. ```{r} suppressPackageStartupMessages(library(metaflow)) message("Current metadata provider: ", get_metadata()) ``` ## List all flows with their latest completion time and status ```{r} set_namespace(NULL) flow_names <- metaflow::list_flows() for (name in unlist(flow_names)){ flow <- flow_client$new(name) run <- run_client$new(flow, flow$latest_run) message("Run id: ", run$id, " Last run: ", run$finished_at, " Successful: ", run$successful) } ``` ## Give some detailed information on HelloAWSFlow ```{r} flow <- flow_client$new("HelloAWSFlow") for (run_id in flow$runs){ run <- run_client$new(flow, run_id) message("Run id: ", run$id, " Successful: ", run$successful) message("Tags: ") print(run$tags) } ``` ================================================ FILE: R/inst/tutorials/07-autopilot/README.md ================================================ # Episode 07-autopilot: Scheduling Compute in the Cloud. **This example revisits 'Episode 05-statistics-redux: Computing in the Cloud'. With Metaflow, you don't need to make any code changes to schedule your flow in the cloud. In this example we will schedule the 'stats.R' workflow using the 'step-functions create' command line argument. This instructs Metaflow to schedule your flow on AWS Step Functions without changing any code. You can execute your flow on AWS Step Functions by using the 'step-functions trigger' command line argument. You can use a notebook to setup a simple dashboard to monitor all of your Metaflow flows.** #### Showcasing: - `step-functions create` command line option - `step-functions trigger` command line option - Accessing data locally or remotely through the Metaflow Client API #### Before playing this episode: 1. Configure your sandbox: https://docs.metaflow.org/metaflow-on-aws/metaflow-sandbox #### To play this episode: ##### Execute the flow: In a terminal: 1. ```cd tutorials/02-statistics/``` 2. ```Rscript stats.R --package-suffixes=.R,.csv step-functions create --max-workers 4``` 3. ```Rscript stats.R --package-suffixes=.R,.csv step-functions trigger``` If you are using RStudio, you can replace the last line `run()` by ```R run(package_suffixes=".R,.csv", step_functions="create", max_workers=4) ``` for SFN create, and ```R run(package_suffixes=".R,.csv", step_functions="trigger") ``` for SFN trigger. You can then directly run `source("stats.R`)` in RStudio. ##### Inspect the results: Open the R Markdown file```07-autopilot/stats.Rmd``` in your RStudio and re-run the cells. You can access the artifacts stored in AWS S3 from your local RStudio session. ================================================ FILE: R/inst/tutorials/07-autopilot/autopilot.Rmd ================================================ --- title: "Episode 7: Autopilot" output: html_notebook --- **This notebook shows how you can track Metaflow flows that have been scheduled to execute in the cloud.** ## Import the metaflow client ```{r} suppressPackageStartupMessages(library(metaflow)) message("Current metadata provider: ", metaflow::get_metadata()) ``` ## Plot a timeline view of a scheduled run of MovieStatsFlow When you triggered your flow on AWS Step Functions using `step-functions trigger`, you would have seen an output similar to - ```{bash} ... Workflow MovieStatsFlow triggered on AWS Step Functions (run-id sfn-dolor-sit-amet). ... ``` Paste the run-id below (run_id = 'sfn-dolor-sit-amet') and run the following after the run finishes on Step Function. ```{r} set_namespace(NULL) run = flow_client$new('MovieStatsFlow')$run('sfn-dolor-sit-amet') print(run$steps) ``` ## Steps View ```{r} for (step_name in run$steps){ step = run$step(step_name) step$summary() } ``` ================================================ FILE: R/inst/tutorials/README.md ================================================ # Tutorials for Metaflow R This set of tutorials provides a hands-on introduction to Metaflow. The [basic concepts](https://docs.metaflow.org/v/r/metaflow/basics) are introduced in practice, and you can find out more details about the functionality showcased in these tutorials in Basics of Metaflow and the following sections. ## Setting up Metaflow comes packaged with the tutorials, so getting started is easy. You can pull a copy of the tutorials to your current directory by running the following command in R: ```R metaflow::pull_tutorials() ``` This creates a directory tutorials in your current working directory with a subdirectory for each tutorial. Each tutorial has a brief description and instructions included in the `README.md` in each subfolder. ================================================ FILE: R/man/add_decorators.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/decorators.R \name{add_decorators} \alias{add_decorators} \title{Format a list of decorators as a character vector} \usage{ add_decorators(decorators) } \arguments{ \item{decorators}{List of decorators, as created by the \code{\link{decorator}} function.} } \value{ character vector } \description{ Format a list of decorators as a character vector } \section{Python decorators}{ Metaflow decorators are so called because they translate directly to Python decorators that are applied to a step. So, for example, \code{decorator("batch", cpu = 1)} in R becomes \verb{@batch(cpu = 1)} in Python. A new line is appended as well, as Python decorators are placed above the function they take as an input. } \examples{ \dontrun{ add_decorators(list(decorator("batch", cpu = 4), decorator("retry"))) #> c("@batch(cpu=4)", "\n", "@retry", "\n") } } \keyword{internal} ================================================ FILE: R/man/batch.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/decorators-aws.R \name{batch} \alias{batch} \alias{resources} \title{Decorator that configures resources allocated to a step} \usage{ batch( cpu = 1L, gpu = 0L, memory = 4096L, image = NULL, queue = NULL, iam_role = NULL, execution_role = NULL, shared_memory = NULL, max_swap = NULL, swappiness = NULL ) resources(cpu = 1L, gpu = 0L, memory = 4096L, shared_memory = NULL) } \arguments{ \item{cpu}{Integer number of CPUs required for this step. Defaults to \code{1}.} \item{gpu}{Integer number of GPUs required for this step. Defaults to \code{0}.} \item{memory}{Integer memory size (in MB) required for this step. Defaults to \code{4096}.} \item{image}{Character. Specifies the image to use when launching on AWS Batch. If not specified, an appropriate \href{https://hub.docker.com/r/rocker/ml}{Rocker Docker image} will be used.} \item{queue}{Character. Specifies the queue to submit the job to. Defaults to the queue determined by the environment variable "METAFLOW_BATCH_JOB_QUEUE"} \item{iam_role}{Character. IAM role that AWS Batch can use to access Amazon S3. Defaults to the one determined by the environment variable METAFLOW_ECS_S3_ACCESS_IAM_ROLE} \item{execution_role}{Character. IAM role that AWS Batch can use to trigger AWS Fargate tasks. Defaults to the one determined by the environment variable METAFLOW_ECS_FARGATE_EXECUTION_ROLE. See the \href{https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html}{AWS Documentation} for more information.} \item{shared_memory}{Integer. The value for the size (in MiB) of the \verb{/dev/shm} volume for this step. This parameter maps to the \code{--shm-size} option to \verb{docker run}.} \item{max_swap}{Integer. The total amount of swap memory (in MiB) a container can use for this step. This parameter is translated to the \code{--memory-swap} option to docker run where the value is the sum of the container memory plus the \code{max_swap} value.} \item{swappiness}{This allows you to tune memory swappiness behavior for this step. A swappiness value of \code{0} causes swapping not to happen unless absolutely necessary. A swappiness value of \code{100} causes pages to be swapped very aggressively. Accepted values are whole numbers between \code{0} and \code{100}.} } \value{ A object of class "decorator" } \description{ These decorators control the resources allocated to step running either locally or on \emph{AWS Batch}. The \code{resources} decorator allocates resources for local execution. However, when a flow is executed with the \code{batch} argument (\verb{run(with = c("batch")}.), it will also control which resources requested from AWS. The \code{batch} decorator instead \emph{forces} the step to be run on \emph{AWS Batch}. See \url{https://docs.metaflow.org/v/r/metaflow/scaling} for more information on how to use these decorators. If both \code{resources} and \code{batch} decorators are provided, the maximum values from all decorators is used. } \examples{ \dontrun{ # This example will generate a large random matrix which takes up roughly # 48GB of memory, and sums the entries. The `batch` decorator forces this # step to run in an environment with 60000MB of memory. start <- function(self) { big_matrix <- matrix(rexp(80000*80000), 80000) self$sum <- sum(big_matrix) } end <- function(self) { message( "sum is: ", self$sum ) } metaflow("BigSumFlowR") \%>\% step( batch(memory=60000, cpu=1), step = "start", r_function = start, next_step = "end" ) \%>\% step( step = "end", r_function = end ) \%>\% run() } } ================================================ FILE: R/man/cash-.metaflow.flowspec.FlowSpec.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{$.metaflow.flowspec.FlowSpec} \alias{$.metaflow.flowspec.FlowSpec} \title{Overload getter for self object} \usage{ \method{$}{metaflow.flowspec.FlowSpec}(self, name) } \arguments{ \item{self}{the metaflow self object for each step function} \item{name}{attribute name} } \description{ Overload getter for self object } \section{Usage}{ \preformatted{ print(self$var) } } ================================================ FILE: R/man/cash-set-.metaflow.flowspec.FlowSpec.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{$<-.metaflow.flowspec.FlowSpec} \alias{$<-.metaflow.flowspec.FlowSpec} \title{Overload setter for self object} \usage{ \method{$}{metaflow.flowspec.FlowSpec}(self, name) <- value } \arguments{ \item{self}{the metaflow self object for each step function} \item{name}{attribute name} \item{value}{value to assign to the attribute} } \description{ Overload setter for self object } \section{Usage}{ \preformatted{ self$var <- "hello" } } ================================================ FILE: R/man/catch.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/decorators-errors.R \name{catch} \alias{catch} \title{Decorator that configures a step to catch an error} \usage{ catch(var = NULL, print_exception = TRUE) } \arguments{ \item{var}{Character. Name of the artifact in which to store the caught exception. If \code{NULL} (the default), the exception is not stored.} \item{print_exception}{Boolean. Determines whether or not the exception is printed to stdout when caught. Defaults to \code{TRUE}.} } \value{ A object of class "decorator" } \description{ Use this decorator to configure a step to catch any errors that occur during evaluation. For steps that can't be safely retried, it is a good idea to use this decorator along with \code{retry(times = 0)}. See \url{https://docs.metaflow.org/v/r/metaflow/failures#catching-exceptions-with-the-catch-decorator} for more information on how to use this decorator. } \examples{ \donttest{ start <- function(self) { stop("Oh no!") } end <- function(self) { message( "Error is : ", self$start_failed ) } metaflow("AlwaysErrors") \%>\% step( catch(var = "start_failed"), retry(times = 0), step = "start", r_function = start, next_step = "end" ) \%>\% step( step = "end", r_function = end ) \%>\% run() } } ================================================ FILE: R/man/container_image.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{container_image} \alias{container_image} \title{Return the default container image to use for remote execution on AWS Batch. By default we user docker images maintained on https://hub.docker.com/r/rocker/ml.} \usage{ container_image() } \description{ Return the default container image to use for remote execution on AWS Batch. By default we user docker images maintained on https://hub.docker.com/r/rocker/ml. } ================================================ FILE: R/man/current.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{current} \alias{current} \title{Helper utility to access current IDs of interest} \usage{ current(value) } \arguments{ \item{value}{one of flow_name, run_id, origin_run_id, step_name, task_id, pathspec, namespace, username, retry_count} } \description{ Helper utility to access current IDs of interest } \examples{ \dontrun{ current("flow_name") } } ================================================ FILE: R/man/decorator.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/decorators.R \name{decorator} \alias{decorator} \title{Metaflow Decorator.} \usage{ decorator(x, ..., .convert_args = TRUE) } \arguments{ \item{x}{Type of decorator (e.g, resources, catch, retry, timeout, batch ...)} \item{...}{Named arguments for the decorator (e.g, \code{cpu=1}, \code{memory=1000}). Note that memory unit is in MB.} \item{.convert_args}{Boolean. If \code{TRUE} (the default), argument values will be converted to analogous Python values, with strings quoted and escaped. Disable this if argument values are already formatted for Python.} } \value{ A object of class "decorator" } \description{ Decorates the \code{step} with the parameters present in its arguments. For this method to work properly, the \code{...} arguments should be named, and decorator type should be the first argument. It may be more convenient to use one of the \emph{decorator wrappers} listed below: \itemize{ \item \code{\link{resources}} \item \code{\link{batch}} \item \code{\link{retry}} \item \code{\link{catch}} \item \code{\link{environment_variables}} } } \examples{ \dontrun{ decorator("catch", print_exception=FALSE) decorator("resources", cpu=2, memory=10000) } } ================================================ FILE: R/man/decorator_arguments.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/decorators.R \name{decorator_arguments} \alias{decorator_arguments} \title{Format the arguments of a decorator as inputs to a Python function} \usage{ decorator_arguments(args, .convert_args = TRUE) } \arguments{ \item{args}{Named list of arguments, as would be provided to the \code{...} of a function.} \item{.convert_args}{Boolean. If \code{TRUE} (the default), argument values will be converted to analogous Python values, with strings quoted and escaped. Disable this if argument values are already formatted for Python.} } \value{ atomic character of arguments, separated by a comma } \description{ Format the arguments of a decorator as inputs to a Python function } \section{Python decorators}{ Metaflow decorators are so called because they translate directly to Python decorators that are applied to a step. So, for example, \code{decorator("batch", cpu = 1)} in R becomes \verb{@batch(cpu = 1)} in Python. A new line is appended as well, as Python decorators are placed above the function they take as an input. } \examples{ \dontrun{ decorator_arguments(list(cpu = 1, memory = 1000)) #> "cpu=1, memory=1000" } } \keyword{internal} ================================================ FILE: R/man/environment_variables.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/decorators-environment.R \name{environment_variables} \alias{environment_variables} \title{Decorator that sets environment variables during step execution} \usage{ environment_variables(...) } \arguments{ \item{...}{Named environment variables and their values, with all values coercible to a character string.. For example, \code{environment_variables(foo = "bar")} will set the "foo" environment variable as "bar" during step execution.} } \value{ A object of class "decorator" } \description{ Decorator that sets environment variables during step execution } \examples{ \dontrun{ start <- function(self) { print(paste("The cutest animal is the", Sys.getenv("CUTEST_ANIMAL"))) print(paste("The", Sys.getenv("ALSO_CUTE"), "is also cute, though")) } metaflow("EnvironmentVariables") \%>\% step(step="start", environment_variables(CUTEST_ANIMAL = "corgi", ALSO_CUTE = "penguin"), r_function=start, next_step="end") \%>\% step(step="end") \%>\% run() } } ================================================ FILE: R/man/flow_client.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/flow_client.R \docType{class} \name{flow_client} \alias{flow_client} \title{flow_client} \format{ \code{\link{R6Class}} object. } \value{ Object of \code{\link{R6Class}} with fields/methods for introspection. } \description{ An R6 Class representing an existing flow with a certain id. Instances of this class contain all runs related to a flow. } \section{Usage}{ \preformatted{ f <- flow_client$new(flow_id) f$id f$tags f$latest_run f$latest_successful_run f$runs f$run(f$latest_run) f$summary() } } \section{Super class}{ \code{\link[metaflow:metaflow_object]{metaflow::metaflow_object}} -> \code{FlowClient} } \section{Active bindings}{ \if{html}{\out{
1. [Rapid local prototyping](https://docs.metaflow.org/metaflow/basics), [support for notebooks](https://docs.metaflow.org/metaflow/managing-flows/notebook-runs), and built-in support for [experiment tracking, versioning](https://docs.metaflow.org/metaflow/client) and [visualization](https://docs.metaflow.org/metaflow/visualizing-results).
2. [Effortlessly scale horizontally and vertically in your cloud](https://docs.metaflow.org/scaling/remote-tasks/introduction), utilizing both CPUs and GPUs, with [fast data access](https://docs.metaflow.org/scaling/data) for running [massive embarrassingly parallel](https://docs.metaflow.org/metaflow/basics#foreach) as well as [gang-scheduled](https://docs.metaflow.org/scaling/remote-tasks/distributed-computing) compute workloads [reliably](https://docs.metaflow.org/scaling/failures) and [efficiently](https://docs.metaflow.org/scaling/checkpoint/introduction).
3. [Easily manage dependencies](https://docs.metaflow.org/scaling/dependencies) and [deploy with one-click](https://docs.metaflow.org/production/introduction) to highly available production orchestrators with built in support for [reactive orchestration](https://docs.metaflow.org/production/event-triggering).
For full documentation, check out our [API Reference](https://docs.metaflow.org/api) or see our [Release Notes](https://github.com/Netflix/metaflow/releases) for the latest features and improvements.
## Getting started
Getting up and running is easy. If you don't know where to start, [Metaflow sandbox](https://outerbounds.com/sandbox) will have you running and exploring in seconds.
### Installing Metaflow
To install Metaflow in your Python environment from [PyPI](https://pypi.org/project/metaflow/):
```sh
pip install metaflow
```
Alternatively, using [conda-forge](https://anaconda.org/conda-forge/metaflow):
```sh
conda install -c conda-forge metaflow
```
Once installed, a great way to get started is by following our [tutorial](https://docs.metaflow.org/getting-started/tutorials). It walks you through creating and running your first Metaflow flow step by step.
For more details on Metaflow’s features and best practices, check out:
- [How Metaflow works](https://docs.metaflow.org/metaflow/basics)
- [Additional resources](https://docs.metaflow.org/introduction/metaflow-resources)
If you need help, don’t hesitate to reach out on our [Slack community](http://slack.outerbounds.co/)!
### Deploying infrastructure for Metaflow in your cloud
While you can get started with Metaflow easily on your laptop, the main benefits of Metaflow lie in its ability to [scale out to external compute clusters](https://docs.metaflow.org/scaling/remote-tasks/introduction)
and to [deploy to production-grade workflow orchestrators](https://docs.metaflow.org/production/introduction). To benefit from these features, follow this [guide](https://outerbounds.com/engineering/welcome/) to
configure Metaflow and the infrastructure behind it appropriately.
## Get in touch
We'd love to hear from you. Join our community [Slack workspace](http://slack.outerbounds.co/)!
## Contributing
We welcome contributions to Metaflow. Please see our [contribution guide](https://docs.metaflow.org/introduction/contributing-to-metaflow) for more details.
================================================
FILE: SECURITY.md
================================================
# Security Policy
We currently accept reports for vulnerabilities on all published versions of the project.
## Reporting a Vulnerability
You can disclose vulnerabilities securely through the [Netflix Bugcrowd](https://bugcrowd.com/netflix) site. When reporting a finding, mention the project name or repository in the title and the report will find its way to the correct people.
Please note that at the moment, the Metaflow project does not offer a bounty for any disclosure.
================================================
FILE: devtools/Makefile
================================================
SHELL := /bin/bash
.SHELLFLAGS := -eu -o pipefail -c
help:
@echo "Available targets:"
@echo " up - Start the development environment"
@echo " shell - Switch to development environment's shell"
@echo " ui - Open Metaflow UI"
@echo " dashboard - Open Minikube dashboard"
@echo " down - Stop and clean up the environment"
@echo " all-up - Start the development environment with all services"
@echo " help - Show this help message"
HELM_VERSION := v3.14.0
MINIKUBE_VERSION := v1.32.0
TILT_VERSION := v0.33.11
GUM_VERSION := v0.15.2
MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
MKFILE_DIR := $(dir $(MKFILE_PATH))
DEVTOOLS_DIR := $(MKFILE_DIR).devtools
PICK_SERVICES := $(MKFILE_DIR)pick_services.sh
MINIKUBE_DIR := $(DEVTOOLS_DIR)/minikube
MINIKUBE := $(MINIKUBE_DIR)/minikube
HELM_DIR := $(DEVTOOLS_DIR)/helm
TILT_DIR := $(DEVTOOLS_DIR)/tilt
TILT := $(TILT_DIR)/tilt
TILTFILE := $(MKFILE_DIR)/Tiltfile
MAKE_CMD := $(MAKE) -f "$(MKFILE_PATH)"
MINIKUBE_CPUS ?= 4
MINIKUBE_MEMORY ?= 6144
MINIKUBE_DISK_SIZE ?= 20g
WAIT_TIMEOUT ?= 300
ifeq ($(shell uname), Darwin)
minikube_os = darwin
tilt_os = mac
else
minikube_os = linux
tilt_os = linux
endif
ifeq ($(shell uname -m), x86_64)
arch = amd64
tilt_arch = x86_64
else
arch = arm64
tilt_arch = arm64
endif
# TODO: Move scripts to a folder
install-helm:
@if ! command -v helm >/dev/null 2>&1; then \
echo "📥 Installing Helm $(HELM_VERSION)..."; \
mkdir -p "$(HELM_DIR)"; \
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 \
| HELM_INSTALL_VERSION="$(HELM_VERSION)" \
USE_SUDO="false" \
PATH="$(HELM_DIR):$$PATH" \
HELM_INSTALL_DIR="$(HELM_DIR)" \
bash; \
chmod +x "$(HELM_DIR)/helm"; \
echo "✅ Helm installation complete"; \
else \
echo "✅ Helm is already installed at $$(command -v helm)"; \
fi
check-docker:
@command -v docker >/dev/null 2>&1 || (echo "❌ 'docker' CLI not found. Please install a Docker-compatible CLI (e.g., Docker Desktop, OrbStack, Colima, Rancher Desktop) and ensure 'docker' is on your PATH." && exit 1)
@docker info >/dev/null 2>&1 || (echo "❌ Cannot connect to Docker daemon. Start your local Docker-compatible engine and check your current Docker context or DOCKER_HOST." && exit 1)
@echo "✅ Docker is ready"
install-brew:
@if [ "$(shell uname)" = "Darwin" ] && ! command -v brew >/dev/null 2>&1; then \
echo "📥 Installing Homebrew..."; \
/bin/bash -c "$$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"; \
echo "✅ Homebrew installation complete"; \
fi
install-curl:
@if ! command -v curl >/dev/null 2>&1; then \
echo "📥 Installing curl..."; \
if [ "$(shell uname)" = "Darwin" ]; then \
HOMEBREW_NO_AUTO_UPDATE=1 brew install curl; \
elif command -v apt-get >/dev/null 2>&1; then \
sudo apt-get update && sudo apt-get install -y curl; \
elif command -v yum >/dev/null 2>&1; then \
sudo yum install -y curl; \
elif command -v dnf >/dev/null 2>&1; then \
sudo dnf install -y curl; \
else \
echo "❌ Could not install curl. Please install manually."; \
exit 1; \
fi; \
echo "✅ curl installation complete"; \
fi
install-gum:
@echo "🔍 Checking if gum is installed..."
@if ! command -v gum >/dev/null 2>&1; then \
echo "📥 Installing gum..."; \
if [ "$(shell uname)" = "Darwin" ]; then \
HOMEBREW_NO_AUTO_UPDATE=1 brew install gum|| { echo "❌ Failed to install gum via Homebrew"; exit 1; }; \
elif command -v apt-get >/dev/null 2>&1; then \
curl -fsSL -o /tmp/gum.deb \
"https://github.com/charmbracelet/gum/releases/download/$(GUM_VERSION)/gum_$(GUM_VERSION:v%=%)_$(arch).deb"; \
sudo apt-get update -qq; \
sudo apt-get install -y /tmp/gum.deb || sudo dpkg -i /tmp/gum.deb; \
rm -f /tmp/gum.deb; \
else \
echo "❌ Could not determine how to install gum for your platform. Please install manually."; \
exit 1; \
fi; \
echo "✅ gum installation complete"; \
else \
echo "✅ gum is already installed."; \
fi
setup-minikube:
@if [ ! -f "$(MINIKUBE)" ]; then \
echo "📥 Installing Minikube $(MINIKUBE_VERSION)"; \
mkdir -p $(MINIKUBE_DIR); \
curl -L --fail https://github.com/kubernetes/minikube/releases/download/$(MINIKUBE_VERSION)/minikube-$(minikube_os)-$(arch) -o $(MINIKUBE) || (echo "❌ Failed to download minikube" && exit 1); \
chmod +x $(MINIKUBE); \
echo "✅ Minikube $(MINIKUBE_VERSION) installed successfully"; \
fi
@echo "🔧 Setting up Minikube $(MINIKUBE_VERSION) cluster..."
@if ! $(MINIKUBE) status >/dev/null 2>&1; then \
echo "🚀 Starting new Minikube $(MINIKUBE_VERSION) cluster..."; \
$(MINIKUBE) start \
--cpus $(MINIKUBE_CPUS) \
--memory $(MINIKUBE_MEMORY) \
--disk-size $(MINIKUBE_DISK_SIZE) \
--driver docker \
|| { echo "❌ Failed to start Minikube (check if Docker is running)"; exit 1; }; \
echo "🔌 Enabling metrics-server and dashboard (quietly)..."; \
$(MINIKUBE) addons enable metrics-server >/dev/null 2>&1; \
$(MINIKUBE) addons enable dashboard >/dev/null 2>&1; \
else \
echo "✅ Minikube $(MINIKUBE_VERSION) cluster is already running"; \
fi
@echo "🎉 Minikube $(MINIKUBE_VERSION) cluster is ready!"
setup-tilt:
@if [ ! -f "$(TILT)" ]; then \
echo "📥 Installing Tilt $(TILT_VERSION)"; \
mkdir -p $(TILT_DIR); \
(curl -L https://github.com/tilt-dev/tilt/releases/download/$(TILT_VERSION)/tilt.$(TILT_VERSION:v%=%).$(tilt_os).$(tilt_arch).tar.gz | tar -xz -C $(TILT_DIR)) && echo "✅ Tilt $(TILT_VERSION) installed successfully" || (echo "❌ Failed to install Tilt" && exit 1); \
fi
tunnel:
$(MINIKUBE) tunnel
teardown-minikube:
@echo "🛑 Stopping Minikube $(MINIKUBE_VERSION) cluster..."
-$(MINIKUBE) stop
@echo "🗑️ Deleting Minikube $(MINIKUBE_VERSION) cluster..."
-$(MINIKUBE) delete --all
@echo "🧹 Removing Minikube binary..."
-rm -rf $(MINIKUBE_DIR)
@echo "✅ Minikube $(MINIKUBE_VERSION) teardown complete"
dashboard:
@echo "🔗 Opening Minikube Dashboard..."
@$(MINIKUBE) dashboard
# make shell is symlinked to metaflow-dev shell by metaflow
up: install-brew check-docker install-curl install-gum setup-minikube install-helm setup-tilt
@echo "🚀 Starting up (may require sudo access)..."
@mkdir -p $(DEVTOOLS_DIR)
@echo '#!/bin/bash' > $(DEVTOOLS_DIR)/start.sh
@echo 'set -e' >> $(DEVTOOLS_DIR)/start.sh
@echo 'trap "exit" INT TERM' >> $(DEVTOOLS_DIR)/start.sh
@echo 'trap "kill 0" EXIT' >> $(DEVTOOLS_DIR)/start.sh
@echo 'eval $$($(MINIKUBE) docker-env --shell bash)' >> $(DEVTOOLS_DIR)/start.sh
@echo 'if [ -n "$$SERVICES_OVERRIDE" ]; then' >> "$(DEVTOOLS_DIR)/start.sh"
@echo ' echo "🌐 Using user-provided list of services: $$SERVICES_OVERRIDE"' >> "$(DEVTOOLS_DIR)/start.sh"
@echo ' SERVICES="$$SERVICES_OVERRIDE"' >> "$(DEVTOOLS_DIR)/start.sh"
@echo 'else' >> "$(DEVTOOLS_DIR)/start.sh"
@echo ' echo "📝 Selecting services..."' >> "$(DEVTOOLS_DIR)/start.sh"
@echo ' SERVICES=$$($(PICK_SERVICES))' >> "$(DEVTOOLS_DIR)/start.sh"
@echo 'fi' >> "$(DEVTOOLS_DIR)/start.sh"
@echo 'PATH="$(MINIKUBE_DIR):$(TILT_DIR):$$PATH" $(MINIKUBE) tunnel &' >> $(DEVTOOLS_DIR)/start.sh
@echo 'echo -e "🚀 Starting Tilt with selected services..."' >> $(DEVTOOLS_DIR)/start.sh
@echo 'echo -e "\033[1;38;5;46m\n🔥 \033[1;38;5;196mNext Steps:\033[0;38;5;46m Use \033[3mmetaflow-dev shell\033[23m to switch to the development\n environment'\''s shell and start executing your Metaflow flows.\n\033[0m"' >> "$(DEVTOOLS_DIR)/start.sh"
@echo 'PATH="$(HELM_DIR):$(MINIKUBE_DIR):$(TILT_DIR):$$PATH" SERVICES="$$SERVICES" tilt up -f $(TILTFILE)' >> $(DEVTOOLS_DIR)/start.sh
@echo 'wait' >> $(DEVTOOLS_DIR)/start.sh
@chmod +x $(DEVTOOLS_DIR)/start.sh
@$(DEVTOOLS_DIR)/start.sh
all-up:
@echo "🚀 Starting up all services..."
SERVICES_OVERRIDE=all $(MAKE_CMD) up
down:
@echo "🛑 Stopping all services..."
@-pkill -f "$(MINIKUBE) tunnel" 2>/dev/null || true
@echo "⏹️ Stopping Tilt..."
@echo "🧹 Cleaning up Minikube..."
$(MAKE_CMD) teardown-minikube
@echo "🗑️ Removing Tilt binary and directory..."
-rm -rf $(TILT_DIR)
@echo "🧹 Removing temporary scripts..."
-rm -rf $(DEVTOOLS_DIR)
@echo "✨ All done!"
shell: setup-tilt
@echo "⏳ Checking if development environment is up..."
@set -eu; \
for i in $$(seq 1 90); do \
if "$(TILT)" get session >/dev/null 2>&1; then \
found_session=1; \
break; \
else \
sleep 2; \
fi; \
done; \
if [ -z "$${found_session:-}" ]; then \
echo "❌ Development environment is not up."; \
echo " Please run 'metaflow-dev up' in another terminal, then re-run 'metaflow-dev shell'."; \
exit 1; \
fi
@echo "⏳ Waiting for development environment to be ready..."
@while true; do \
"$(TILT)" get uiresource generate-configs >/dev/null 2>&1; \
status=$$?; \
if [ $$status -eq 0 ]; then \
if ! "$(TILT)" wait --for=condition=Ready uiresource/generate-configs --timeout=300s; then \
echo "❌ Timed out waiting for development environment to be ready."; \
exit 1; \
fi; \
break; \
elif [ $$status -eq 127 ]; then \
echo "❌ Development environment is not up."; \
echo " Please run 'metaflow-dev up' in another terminal, then re-run 'metaflow-dev shell'."; \
exit 1; \
else \
sleep 1; \
fi; \
done
@echo "🔧 Starting a new shell for development environment..."
@bash -c '\
if [ -n "$$SHELL" ]; then \
user_shell="$$SHELL"; \
else \
user_shell="$(SHELL)"; \
fi; \
echo "🔎 Using $$user_shell for interactive session."; \
echo "🐍 If you installed Metaflow in a virtual environment, activate it now."; \
if [ -f "$(DEVTOOLS_DIR)/aws_config" ]; then \
env -u AWS_PROFILE \
AWS_SHARED_CREDENTIALS_FILE= \
METAFLOW_HOME="$(DEVTOOLS_DIR)" \
METAFLOW_PROFILE=local \
AWS_CONFIG_FILE="$(DEVTOOLS_DIR)/aws_config" \
"$$user_shell" -i; \
else \
env METAFLOW_HOME="$(DEVTOOLS_DIR)" \
METAFLOW_PROFILE=local \
"$$user_shell" -i; \
fi'
wait-until-ready:
@echo "Waiting for infrastructure to be ready. Timing out in $(WAIT_TIMEOUT) seconds..."
@timeout $(WAIT_TIMEOUT) bash -c 'while [ ! -f $(DEVTOOLS_DIR)/start.sh ]; do sleep 10; done; echo "Infra is Ready"' || (echo "Waiting for infra timed out"&&exit 1)
# buffer to get the tilt api running
@timeout 120 bash -c 'while ! $(TILT) get session; do sleep 3;done'
@echo "Waiting for services to be ready. Timing out in $(WAIT_TIMEOUT) seconds..."
# Need to wait for Tiltfile first, as other resources return 404 otherwise
@$(TILT) wait --for=condition=Ready "uiresource/(Tiltfile)" --timeout=$(WAIT_TIMEOUT)s
@$(TILT) wait --for=condition=Ready uiresource/generate-configs --timeout=$(WAIT_TIMEOUT)s
# @echo '$(MAKE_CMD) create-dev-shell' >> $(DEVTOOLS_DIR)/start.sh
# @echo 'rm -f /tmp/metaflow-devshell-*' >> $(DEVTOOLS_DIR)/start.sh
create-dev-shell: setup-tilt
@bash -c '\
SHELL_PATH=/tmp/metaflow-dev-shell-$$$$ && \
echo "#!/bin/bash" > $$SHELL_PATH && \
echo "set -e" >> $$SHELL_PATH && \
echo "" >> $$SHELL_PATH && \
echo "echo \"⏳ Checking if development environment is up...\"" >> $$SHELL_PATH && \
echo "if ! $(TILT) get session >/dev/null 2>&1; then" >> $$SHELL_PATH && \
echo " echo \"❌ Development environment is not up.\"" >> $$SHELL_PATH && \
echo " echo \" Please run '\''make up'\'' in another terminal, then re-run this script.\"" >> $$SHELL_PATH && \
echo " exit 1" >> $$SHELL_PATH && \
echo "fi" >> $$SHELL_PATH && \
echo "" >> $$SHELL_PATH && \
echo "echo \"⏳ Waiting for development environment to be ready...\"" >> $$SHELL_PATH && \
echo "if ! $(TILT) wait --for=condition=Ready uiresource/generate-configs --timeout=300s; then" >> $$SHELL_PATH && \
echo " echo \"❌ Timed out waiting for development environment to be ready.\"" >> $$SHELL_PATH && \
echo " exit 1" >> $$SHELL_PATH && \
echo "fi" >> $$SHELL_PATH && \
echo "" >> $$SHELL_PATH && \
echo "echo \"🔧 Starting a new shell for development environment...\"" >> $$SHELL_PATH && \
echo "if [ -n \"\$$SHELL\" ]; then" >> $$SHELL_PATH && \
echo " user_shell=\"\$$SHELL\"" >> $$SHELL_PATH && \
echo "else" >> $$SHELL_PATH && \
echo " user_shell=\"$(SHELL)\"" >> $$SHELL_PATH && \
echo "fi" >> $$SHELL_PATH && \
echo "echo \"🔎 Using \$$user_shell for interactive session.\"" >> $$SHELL_PATH && \
echo "echo \"🐍 If you installed Metaflow in a virtual environment, activate it now.\"" >> $$SHELL_PATH && \
echo "if [ -f \"$(DEVTOOLS_DIR)/aws_config\" ]; then" >> $$SHELL_PATH && \
echo " env METAFLOW_HOME=\"$(DEVTOOLS_DIR)\" \\" >> $$SHELL_PATH && \
echo " METAFLOW_PROFILE=local \\" >> $$SHELL_PATH && \
echo " AWS_CONFIG_FILE=\"$(DEVTOOLS_DIR)/aws_config\" \\" >> $$SHELL_PATH && \
echo " AWS_SHARED_CREDENTIALS_FILE= \\" >> $$SHELL_PATH && \
echo " \"\$$user_shell\" -i" >> $$SHELL_PATH && \
echo "else" >> $$SHELL_PATH && \
echo " env METAFLOW_HOME=\"$(DEVTOOLS_DIR)\" \\" >> $$SHELL_PATH && \
echo " METAFLOW_PROFILE=local \\" >> $$SHELL_PATH && \
echo " \"\$$user_shell\" -i" >> $$SHELL_PATH && \
echo "fi" >> $$SHELL_PATH && \
chmod +x $$SHELL_PATH && \
echo "✨ Created $$SHELL_PATH" && \
echo "🔑 Execute it from ANY directory to switch to development environment shell!" \
'
ui: setup-tilt
@echo "⏳ Checking if the development environment is up..."
@if ! $(TILT) get session >/dev/null 2>&1; then \
echo "❌ Development environment is not up."; \
echo " Please run 'metaflow-dev up' in another terminal, then re-run 'metaflow-dev ui'."; \
exit 1; \
fi
@echo "⏳ Waiting for Metaflow UI to be ready..."
@while true; do \
"$(TILT)" get uiresource metaflow-ui >/dev/null 2>&1; \
status=$$?; \
if [ $$status -eq 0 ]; then \
"$(TILT)" wait --for=condition=Ready uiresource/metaflow-ui; \
break; \
elif [ $$status -eq 127 ]; then \
echo "❌ Development environment is not up."; \
echo " Please run 'metaflow-dev up' in another terminal, then re-run 'metaflow-dev shell'."; \
exit 1; \
else \
sleep 1; \
fi; \
done
@echo "🔗 Opening Metaflow UI at http://localhost:3000"
@open http://localhost:3000
.PHONY: install-helm setup-minikube setup-tilt teardown-minikube tunnel up down check-docker install-curl install-gum install-brew up down dashboard shell ui all-up help
.DEFAULT_GOAL := help
================================================
FILE: devtools/Tiltfile
================================================
# Tilt configuration for running Metaflow on a local Kubernetes stack
#
# Usage:
# Start the development environment:
# $ tilt up
# Stop and clean up:
# $ tilt down
# TODO:
# 1. move away from temporary images
# 2. introduce kueue and jobsets
# 3. lock versions
version_settings(constraint='>=0.22.2')
allow_k8s_contexts('minikube')
# Version configuration for components
JOBSET_VERSION = os.getenv("JOBSET_VERSION", "v0.8.2")
# Argo Workflows versions
ARGO_WORKFLOWS_HELM_CHART_VERSION = os.getenv("ARGO_WORKFLOWS_HELM_CHART_VERSION", "0.45.2") # Helm chart version
ARGO_WORKFLOWS_IMAGE_TAG = os.getenv("ARGO_WORKFLOWS_IMAGE_TAG", "v3.6.0") # Argo Workflows application version
# Argo Events versions
ARGO_EVENTS_HELM_CHART_VERSION = os.getenv("ARGO_EVENTS_HELM_CHART_VERSION", "2.4.8") # Helm chart version
ARGO_EVENTS_IMAGE_TAG = os.getenv("ARGO_EVENTS_IMAGE_TAG", "v1.9.2") # Argo Events application version
components = {
"metadata-service": ["postgresql"],
"ui": ["postgresql", "minio"],
"minio": [],
"postgresql": [],
"argo-workflows": [],
"argo-events": ["argo-workflows"],
"jobset": [],
}
services_env = os.getenv("SERVICES", "all").strip().lower()
if services_env:
if services_env == "all":
requested_components = list(components.keys())
else:
requested_components = services_env.split(",")
else:
requested_components = list(components.keys())
metaflow_config = {}
metaflow_config["METAFLOW_KUBERNETES_NAMESPACE"] = "default"
aws_config = []
def write_config_files():
metaflow_json = encode_json(metaflow_config)
cmd = '''cat > .devtools/config_local.json <%s
"%self._text class CustomCard(MetaflowCard): type = "custom_card" HTML = "{data}" def __init__(self, options={"no_header": True}, graph=None, components=[], flow=None, **kwargs): super().__init__() self._no_header = True self._graph = graph if "no_header" in options: self._no_header = options["no_header"] def render(self, task): pt = self._get_mustache() data = '\n'.join([ Title("Title 1").render(), Text("some text comes here").render(), Title("Title 2").render(), Text("some text comes here again").render(), ]) data = dict( data = data ) html_template = self.HTML return pt.render(html_template,data) ``` ### `DefaultCard` The [DefaultCard](../metaflow/plugins/cards/card_modules/basic.py) is a default card exposed by metaflow. This will be used when the `@card` decorator is called without any `type` argument or called with `type='default'` argument. It will also be the default card used with cli. The card uses an [HTML template](../metaflow/plugins/cards/card_modules/base.html) along with a [JS](../metaflow/plugins/cards/card_modules/main.js) and a [CSS](../metaflow/plugins/cards/card_modules/bundle.css) files. The [HTML](../metaflow/plugins/cards/card_modules/base.html) is a template which works with [JS](../metaflow/plugins/cards/card_modules/main.js) and [CSS](../metaflow/plugins/cards/card_modules/bundle.css). The JS and CSS are created after building the JS and CSS from the [cards-ui](../metaflow/plugins/cards/ui/README.md) directory. [cards-ui](../metaflow/plugins/cards/ui/README.md) consists of the JS app that generates the HTML view from a JSON object. ### Default `MetaflowCardComponent` `DefaultCard`/`BlankCard` can be given `MetaflowCardComponent` from `@step` code. The following are the main `MetaflowCardComponent`s available via `metaflow.cards`. - `Artifact` : A component to help log artifacts at task runtime. - Example : `Artifact(some_variable,compress=True)` - `Table` : A component to create a table in the card HTML. Consists of convenience methods : - `Table.from_dataframe(df)` to make a table from a dataframe. - `Image` : A component to create an image in the card HTML: - `Image(bytearr,"my Image from bytes")`: to directly from `bytes` - `Image.from_pil_image(pilimage,"From PIL Image")` : to create an image from a `PIL.Image` - `Image.from_matplotlib(plot,"My matplotlib plot")` : to create an image from a plot - `Error` : A wrapper subcomponent to display errors. Accepts an `exception` and a `title` as arguments. - `Markdown` : A component that renders markdown in the HTML template ### Editing `MetaflowCard` from `@step` code `MetaflowCard`s can be edited from `@step` code using the `current.card` interface. The `current.card` interface will only be active when a `@card` decorator is placed over a `@step`. To understand the workings of `current.card` consider the following snippet. ```python @card(type='blank',id='a') @card(type='default') @step def train(self): from metaflow.cards import Markdown from metaflow import current current.card['a'].append(Markdown('# This is present in the blank card with id "a"')) current.card.append(Markdown('# This is present in the default card')) self.t = dict( hi = 1, hello = 2 ) self.next(self.end) ``` In the above scenario there are two `@card` decorators which are being customized by `current.card`. The `current.card.append`/ `current.card['a'].append` methods only accepts objects which are subclasses of `MetaflowCardComponent`. The `current.card.append`/ `current.card['a'].append` methods only add a component to **one** card. Since there can be many cards for a `@step`, a **default editable card** is resolved to disambiguate which card has access to the `append`/`extend` methods within the `@step`. A default editable card is a card that will have access to the `current.card.append`/`current.card.extend` methods. `current.card` resolve the default editable card before a `@step` code gets executed. It sets the default editable card once the last `@card` decorator calls the `task_pre_step` callback. In the above case, `current.card.append` will add a `Markdown` component to the card of type `default`. `current.card['a'].append` will add the `Markdown` to the `blank` card whose `id` is `a`. A `MetaflowCard` can be user editable, if `ALLOW_USER_COMPONENTS` is set to `True`. Since cards can be of many types, **some cards can also be non-editable by users** (Cards with `ALLOW_USER_COMPONENTS=False`). Those cards won't be eligible to access the `current.card.append`. A non-user editable card can be edited through explicitly setting an `id` and accessing it via `current.card['myid'].append` or by looking it up by its type via `current.card.get(type=’pytorch’)`. #### `current.card` (`CardComponentCollector`) The `CardComponentCollector` is the object responsible for resolving a `MetaflowCardComponent` to the card referenced in the `@card` decorator. Since there can be many cards, `CardComponentCollector` has a `_finalize` function. The `_finalize` function is called once the **last** `@card` decorator calls `task_pre_step`. The `_finalize` function will try to find the **default editable card** from all the `@card` decorators on the `@step`. The default editable card is the card that can access the `current.card.append`/`current.card.extend` methods. If there are multiple editable cards with no `id` then `current.card` will throw warnings when users call `current.card.append`. This is done because `current.card` cannot resolve which card the component belongs. The `@card` decorator also exposes another argument called `customize=True`. **Only one `@card` decorator over a `@step` can have `customize=True`**. Since cards can also be added from CLI when running a flow, adding `@card(customize=True)` will set **that particular card** from the decorator as default editable. This means that `current.card.append` will append to the card belonging to `@card` with `customize=True`. If there is more than one `@card` decorator with `customize=True` then `current.card` will throw warnings that `append` won't work. One important feature of the `current.card` object is that it will not fail. Even when users try to access `current.card.append` with multiple editable cards, we throw warnings but don't fail. `current.card` will also not fail when a user tries to access a card of a non-existing id via `current.card['mycard']`. Since `current.card['mycard']` gives reference to a `list` of `MetaflowCardComponent`s, `current.card` will return a non-referenced `list` when users try to access the dictionary interface with a nonexistent id (`current.card['my_non_existant_card']`). Once the `@step` completes execution, every `@card` decorator will call `current.card._serialize` (`CardComponentCollector._serialize`) to get a JSON serializable list of `str`/`dict` objects. The `_serialize` function internally calls all [component's](#metaflowcardcomponent) `render` function. This list is `json.dump`ed to a `tempfile` and passed to the `card create` subprocess where the `MetaflowCard` can use them in the final output. ### Creating Custom Installable Cards Custom cards can be installed with the help of the `metaflow_extensions` namespace package. Every `metaflow_extensions` module having custom cards should follow the below directory structure. You can see an example cookie-cutter card over [here](https://github.com/outerbounds/metaflow-card-html). ``` your_package/ # the name of this dir doesn't matter ├ setup.py ├ metaflow_extensions/ │ └ organizationA/ # NO __init__.py file, This is a namespace package. │ └ plugins/ # NO __init__.py file, This is a namespace package. │ └ cards/ # NO __init__.py file, This is a namespace package. │ └ my_card_module/ # Name of card_module │ └ __init__.py. # This is the __init__.py is required to recognize `my_card_module` as a package │ └ somerandomfile.py. # Some file as a part of the package. . ``` The `__init__.py` of the `metaflow_extensions.organizationA.plugins.cards.my_card_module`, requires a `CARDS` attribute which needs to be a `list` of objects inheriting `MetaflowCard` class. For Example, in the below `__init__.py` file exposes a `MetaflowCard` of `type` "y_card2". ```python from metaflow.cards import MetaflowCard class YCard(MetaflowCard): type = "y_card2" ALLOW_USER_COMPONENTS = True def __init__(self, options={}, components=[], graph=None, flow=None, **kwargs): self._components = components def render(self, task): return "I am Y card %s" % '\n'.join([comp for comp in self._components]) CARDS = [YCard] ``` Having this `metaflow_extensions` module present in the PYTHONPATH can also work. Custom cards can also be created by reusing components provided by metaflow. For Example : ```python from metaflow.cards import BlankCard from metaflow.cards import Artifact,Table class MyCustomCard(BlankCard): type = 'my_custom_card' def render(self, task): art_com = [ Table( [[Artifact(k.data,k.id)] for k in task] ).render() ] return super().render(task,components=[art_com]) CARDS = [MyCustomCard] ``` ================================================ FILE: docs/concurrency.md ================================================ # Concurrency in the Metaflow Codebase Here's a definition of concurrency and its sibling concept parallelism: *Concurrency is the composition of independently executing processes, while parallelism is the simultaneous execution of (possibly related) computations* from [a talk by Rob Pike, Concurrency is not Parallelism](https://blog.golang.org/concurrency-is-not-parallelism): **Parallelism** is a relatively straightforward and quantifiable concept. However, it is not always easy to decide what constructs of **concurrency**, which can lead to parallelism, are most appropriate in each context. The choice is not easy since besides parallelism and performance, we also want to optimize our code for robustness, observability, maintainability, and readability. This document describes the constructs of concurrency that are used in the Metaflow codebase. If you need to leverage concurrency in the internals of Metaflow, this document should help you to choose the right tool for the job. However, we do **not encourage** you to introduce concurrency unless it is clearly necessary. It is much easier to write simple, readable, and robust non-concurrent code compared to anything concurrent. [Make it work, make it right, make it fast](http://wiki.c2.com/?MakeItWorkMakeItRightMakeItFast). Concurrency is practically never needed during the first two phases. ## Vocabulary We divide the concurrency constructs into two categories: Primary and Secondary. Whenever possible, you should prefer the constructs in the first category. The patterns are well established and have been used successfully in the core Metaflow modules, `runtime.py` and `task.py`. The constructs in the second category can be used in subprocesses, outside the core code paths in `runtime.py` and `task.py`. The reasons for this are elaborated below. In this document, we call an atomic unit of concurrent execution **a task**. A task is an operation that we want to execute concurrently with other operations. In this sense, tasks are equivalent to [`asyncio.Task`s in Python](https://docs.python.org/3/library/asyncio-task.html#asyncio.Task), [Goroutines in Go](https://tour.golang.org/concurrency/1), and [Processes in Erlang](https://erlangbyexample.org/processes). Coincidentally, Metaflow `Task`s run by `task.py` are also tasks in this sense but we have also many other internal tasks in Metaflow besides the `Task` that executes the user code. For a quick overview, see the [summary](#summary) below. ## Primary Constructs for Concurrency These patterns power the core Metaflow functionality in `runtime.py` and `task.py`. They are also fully observable: You can easily see what concurrent tasks are running, and you can re-launch individual tasks for testing and reproduction of issues. ### 1. Subprocesses for subcommands Metaflow uses its own CLI to execute tasks as subprocesses. There are two main benefits of this approach: 1. Subprocesses are fully isolated from the parent process, so they can execute arbitrary user code. Besides intentionally malicious code and resource exhaustion, there is no way for the child process to crash the parent, which is critically important for Metaflow. 2. Subprocesses can be launched by different parents easily, thanks to the standard CLI "API". We leverage this feature to launch subprocesses on Titus and via Meson. #### Example Uses The subcommand `step` is used to execute individual Metaflow tasks. This subcommand is also used to clone many datastores concurrently during `resume`. These subprocesses are managed by `runtime.Worker`. #### How to Observe Set the environment variable `METAFLOW_DEBUG_SUBCOMMAND=1` to see the exact command line that is used to launch a subcommand task. You can re-execute the task simply by re-executing the command line manually. However, be careful when re-executing commands from real runs, as you will rewrite data in the datastore. To be safe, preferably rerun only commands executed with `--datastore=local` and `--metadata=local`. You can observe running subprocesses with `ps` and attach to them using `gdb` as usual. Or you can kill them e.g. with `kill -9`. #### Intended Use Cases Subcommands work best if there is very limited communication between the parent and the child process. No message passing between the processes is supported currently. ### 2. Sidecars Sidecars were introduced to address the need to execute internal tasks in parallel with scheduling in `runtime.py` or during the execution of user code in `task.py`. Especially in the latter case the user code may block the Python interpreter for an arbitrary amount of time, so there isn't a safe way to execute internal tasks in the same interpreter. As a solution, we use child processes to host these tasks, aka sidecars. The lifetime of a sidecar is bound to the lifetime of its parent process. In contrast to subcommands, there is a one-way, lossy, communication channel from the parent to the sidecar. Sidecar implementations are expected to consume messages from the parent without delay, to avoid the parent from blocking. The sidecar subprocess may die for various reasons, in which case messages sent to it by the parent may be lost. To keep communication essentially non-blocking and fast, there is no blocking acknowledgement of successful message processing by the sidecar. Hence the communication is lossy. In this sense, communication with a sidecar is more akin to UDP than TCP. #### Example Uses We send heart beats to metadata service in a sidecar, `heartbeat.py` to detect whether the task is alive. Since heart beats are purely informational, we didn't want to increase the latency of the main process due to these service calls, nor we wanted to fail the whole parent process in case of a request failing. A sidecar that handles communication with the metadata service was a perfect solution. #### How to Observe Set the environment variable `METAFLOW_DEBUG_SIDECAR=1` to see the commands used to launch sidecars. You can send messages to the sidecar via `stdin`. However, be mindful about not polluting production systems with test data when testing sidecars. You can observe running sidecars with `ps` and attach to them using `gdb` as usual. Or you can kill them e.g. with `kill -9`. #### Intended Use Cases Use a sidecar if you need a task that runs during scheduling or execution of user code. A sidecar task can not perform any critical operations that must succeed in order for a task or a run to be considered valid. This makes sidecars suitable only for opportunistic, best-effort tasks. ### 3. Data Parallelism Many use cases of concurrency are related to IO: we want to load or store N objects in parallel. Instead of hiding data parallelism in generic constructs of concurrency, e.g. a thread pool, we can leverage specific constructs optimized for this use case. In the case of Metaflow, data parallelism is most often related to Amazon S3 which is our main `datastore`. Luckily, Metaflow comes with [a built-in S3 client](https://docs.metaflow.org/metaflow/data#data-in-s-3-metaflow-s3) that provides methods like `get_many` that handle concurrency automatically. #### Example Uses The `MetaflowDatastoreSet` class represents a set of datastores which can be loaded concurrently. Using this class instead of loading each `Datastore` sequentially has yielded a significant performance boost in `resume` and normal task execution. #### How to Observe Set the environment variable `METAFLOW_DEBUG_S3CLIENT=1` to see the commands used to interact with S3 through the built-in client. Note that this setting will also persist temporary control files passed to the client, to make it easier to reproduce and observe the client's behavior. However, you will need to clean up the temporary files, prefixed with `metaflow.s3`, manually. The client uses a CLI of `s3op.py` internally, which you can test with ``` python -m metaflow.datatools.s3op ``` You can observe running S3 operations with `ps` and attach to them using `gdb` as usual. Or you can kill them e.g. with `kill -9`. #### Intended Use Cases Use data parallelism provided by `S3.get_many` / `S3.put_many` when you need to perform multiple S3 operations. S3 really shines at providing maximum performance for a high number of parallel operations. ## Secondary Constructs for Concurrency The following constructs can be used in sidecars and other subprocesses of Metaflow. They are not well-suited for being used in `runtime.py` and `task.py` directly, as explained below. ### 4. Threads The internal state of the Python interpreter is guarded by [the Global Interpreter Lock, or GIL](https://wiki.python.org/moin/GlobalInterpreterLock). The main effect of the GIL is that in most cases two distinct threads executing Python can't run in parallel, which limits the usefulness of threads in Python. Even if this wasn't the case, [threads are hard to use correctly](https://www.google.com/search?q=threads+are+evil). However, as a construct of concurrency, if not parallelism, threads have some uses. The main upside of threads is that communication between tasks is very easy and practically zero-cost. #### Example Uses Many sidecars, e.g. `heartbeat.py`, use a separate worker thread to make sure that the main process consuming messages from the parent will not block for an extended amount of time. ### 5. Multiprocessing The `multiprocessing` module in Python is a (thick) layer of abstraction over subprocesses. The main upside of `multiprocessing` is that it is not limited by the Global Interpreter Lock, so it can leverage multi-process/multi-core parallelism. The main downside of `multiprocessing` is that it tries to provide a very high-level abstraction over processes, which is surprisingly hard to do well. For this reason, historically, the implementation has not been bug-free. Even though the implementation has improved over time, it has still rough edges: e.g. messages need to be picklable, their sizes are limited, called functions need to be at the top level of the module etc. Also, debugging `multiprocessing` code can be hard compared to plain subprocesses. Use `multiprocessing` in your subprocesses if you absolutely need one of the advanced constructs, such as multi-consumer `Queue`. For simple use cases, simple subprocesses are almost always a better choice. #### Example Uses The Metaflow S3 client, `s3op.py`, uses `multiprocessing` internally to manage its internal worker processes. ### 6. `parallel_map` A close cousin of `multiprocessing` is [`metaflow.parallel_map`](https://docs.metaflow.org/metaflow/scaling#parallelization-over-multiple-cores). In contrast to `multiprocessing`, child processes are simply `fork`'ed instead of executed as subprocesses. The main upside of this approach is that passing data, including the function defining the task, has no limitations and only a negligible cost, since no serialization is involved. However, passing data back to the parent involves pickling, similar to `multiprocessing`. However, [the semantics of `fork` can be finicky](https://codewithoutrules.com/2018/09/04/python-multiprocessing/). For this reason, we want to avoid using `parallel_map` in the core Metaflow. ### 7. Async Python 3 introduced [asynchronous programming as the first-class citizen in Python](https://docs.python.org/3/library/asyncio.html). At its core, `asyncio` is a scheduler for cooperative multitasking. The main upside of `asyncio` is that it makes concurrency very explicit: the code can include explicit `Task` objects that yield (`await`) control to other tasks when they see fit. This style of concurrency is particularly well suited for IO-bound network programming, e.g. web servers, which need to execute many request handler tasks concurrently, more so than in parallel. The downsides of `asyncio` are many: - `asyncio` is not available in Python 2 and its standard library implementation has been quickly evolving at least until Python 3.6. This makes it practically unusable in Metaflow, which needs to support Python 2 and earlier versions of Python 3. - `asyncio` requires a lot of attention from the programmer. It is very easy to introduce issues that tank the performance (e.g. a single blocking function call), produce extremely hard to debug bugs (e.g. forget to catch an exception), and/or random deadlocks (e.g. wait on a shared resource). - By default, `asyncio` is useless for CPU-bound tasks. It needs to rely on a thread- or a process-pool to achieve CPU-parallelism. One could use a thread or a process-pool directly and avoid many pitfalls of `asyncio`. `asyncio` has its uses in servers outside Metaflow. Currently it is not suitable to be used in the core Metaflow. ## Summary The table below summarizes the discussion. We focus on comparing four key features of the concurrency constructs: - **Arbitrary code** - does the construct provide enough isolation that it can be used to execute arbitrary, user-defined, Python-code safely. - **Return data** - does the construct allow returning data to the caller after the task has finished. - **Message passing** - does the construct support communication between tasks during the execution of tasks. - **Observable** - is it possible to observe what tasks are running and re-execute individual tasks easily, e.g. to reproduce issues. ``` Construct Arbitrary code Return data Message passing Observable PRIMARY Subprocesses yes partial(1) no yes Sidecars partial(2) no partial(3) partial(4) Data Parallelism no yes no yes SECONDARY Threads no yes yes no Multiprocessing yes partial(5) partial(5) no parallel_map partial(6) partial(7) no no Async no yes yes no ``` 1. We record only the exit code of a subprocess. Data can not be returned directly. 2. Sidecars need to be well-behaving: they need to consume messages from the parent without delay. 3. Sidecars support only lossy, one-way message passing from the parent to the sidecar. 4. In contrast to subprocesses and data parallelism, the command line does not provide sufficient information to reconstruct the exact state of a sidecar. This would require replaying of all messages sent to the sidecar. 5. Values communicated via `multiprocessing` need to be picklable. There are other limitations and issues related to the `Queue` object, which is used to facilitate communication. 6. Due to finicky semantics of `fork`, the child process is only partially isolated from the parent which makes `parallel_map` a bad candidate for execution of arbitrary code. 7. Values returned by `parallel_map` need to be picklable. ================================================ FILE: docs/datastore.md ================================================ # Datastore design ## Motivation The datastore is a crucial part of the Metaflow architecture and deals with storing and retrieving data, be they artifacts (data produced or consumed within user steps), logs, metadata information used by Metaflow itself to track execution or other data like code packages. One of the key benefits of Metaflow is the ease with which users can access the data; it is made available to steps of a flow that need it and users can access it using the Metaflow client API. This documentation provides a brief overview of Metaflow's datastore implementation and points out ways in which it can be extended to support, for example, other storage systems (like GCS instead of S3). ## High-level design ### Design principles A few principles were followed in designing this datastore. They are listed here for reference and to help explain some of the choices made. #### Backward compatibility The new datastore should be able to read and interact with data stored using an older implementation of the datastore. While we do not guarantee forward compatibility, currently, older datastores should be able to read most of the data stored using the newer datastore. #### Batch operations Where possible, APIs are batch friendly and should be used that way. In other words, it is typically more efficient to call an API once, passing it all the items to operate on (for example, all the keys to fetch) than to call the same API multiple times with a single key at a time. All APIs are designed with batch processing in mind where it makes sense. #### Separation of responsibilities Each class implements few functionalities, and we attempted to maximize reuse. The idea is that this will also help in developing newer implementations going forward and being able to surgically change a few things while keeping most of the code the same. ### Storage structure Before going into the design of the datastore itself, it is worth considering **where** Metaflow stores its information. Note that, in this section, the term `directory` can also refer to a `prefix` in S3 for example. Metaflow considers a datastore to have a `datastore_root` which is the base directory of the datastore. Within that directory, Metaflow will create multiple subdirectories, one per flow (identified by the name of the flow). Within each of those directories, Metaflow will create one directory per run as well as a `data` directory which will contain all the artifacts ever produced by that flow. The datastore has several components (starting at the lowest-level): - a `DataStoreStorage` which abstracts away a storage system (like S3 or the local filesystem). This provides very simple methods to read and write bytes, obtain metadata about a file, list a directory as well as minor path manipulation routines. Metaflow provides sample S3 and local filesystem implementations. When implementing a new backend, you should only need to implement the methods defined in `DataStoreStorage` to integrate with the rest of the Metaflow datastore implementation. - a `ContentAddressedStore` which implements a thin layer on top of a `DataStoreStorage` to allow the storing of byte blobs in a content-addressable manner. In other words, for each `ContentAddressedStore`, identical objects are stored once and only once, thereby providing some measure of de-duplication. This class includes the determination of what content is the same or not as well as any additional encoding/compressing prior to storing the blob in the `DataStoreStorage`. You can extend this class by providing alternate methods of packing and unpacking the blob into bytes to be saved. - a `TaskDataStore` is the main interface through which the rest of Metaflow interfaces with the datastore. It includes functions around artifacts ( `persisting` (saving) artifacts, loading (getting)), logs and metadata. - a `FlowDataStore` ties everything together. A `FlowDataStore` will include a `ContentAddressedStore` and all the `TaskDataStore`s for all the tasks that are part of the flow. The `FlowDataStore` includes functions to find the `TaskDataStore` for a given task as well as to save and load data directly ( this is used primarily for data that is not tied to a single task, for example code packages which are more tied to runs). From the above description, you can see that there is one `ContentAddressedStore` per flow so artifacts are de-duplicated *per flow* but not across all flows. ## Implementation details In this section, we will describe each individual class mentioned above in more detail ### `DataStoreStorage` class This class implements low-level operations directly interacting with the file-system (or other storage system such as S3). It exposes a file and directory like abstraction (with functions such as `path_join`, `path_split`, `basename`, `dirname` and `is_file`). Files manipulated at this level are byte objects; the two main functions `save_bytes` and `load_bytes` operate at the byte level. Additional metadata to save alongside the file can also be provided as a dictionary. The backend does not parse or interpret this metadata in any way and simply stores and retrieves it. The `load_bytes` has a particularity in the sense that it returns an object `CloseAfterUse` which must be used in a `with` statement. Any bytes loaded will not be accessible after the `with` statement terminates and so must be used or copied elsewhere prior to termination of the `with` scope. ### `ContentAddressedStore` class The content addressed store also handles content as bytes but performs two additional operations: - de-duplicates data based on the content of the data (in other words, two identical blobs of data will only be stored once) - transforms the data prior to storing; we currently only compress the data but other operations are possible. Data is always de-duplicated, but you can choose to skip the transformation step by telling the content address store that the data should be stored `raw` (ie: with no transformation). Note that the de-duplication logic happens *prior* to any transformation (so the transformation itself will not impact the de-duplication logic). Content stored by the content addressed store is addressable using a `key` which is returned when `save_blobs` is called. `raw` objects can also directly be accessed using a `uri` (also returned by `save_blobs`); the `uri` will point to the location of the `raw` bytes in the underlying `DataStoreStorage` (so, for example, a local filesystem path or a S3 path). Objects that are not `raw` do not return a `uri` as they should only be accessed through the content addressed store. The symmetrical function to `save_blobs` is `load_blobs` which takes a list of keys (returned by `save_blobs`) and loads all the objects requested. Note that at this level of abstraction, there is no `metadata` for the blobs; other mechanisms exist to store, for example, task metadata or information about artifacts. #### Implementation detail The content addressed store contains several (well currently only a pair) of functions named `_pack_vX` and `_unpack_vX`. They effectively correspond to the transformations (both transformation to store and reverse transformation to load) the data undergoes prior to being stored. The `X` corresponds to the version of the transformation allowing new transformations to be added easily. A backward compatible `_unpack_backward_compatible` method also allows this datastore to read any data that was stored with a previous version of the datastore. Note that going forward, if a new datastore implements `_pack_v2` and `_unpack_v2`, this datastore would not be able to unpack things packed with `_pack_v2` but would throw a clear error as to what is happening. ### `TaskDataStore` class This is the meatiest class and contains most of the functionality that an executing task will use. The `TaskDataStore` is also used when accessing information and artifacts through the Metaflow Client. #### Overview At a high level, the `TaskDataStore` is responsible for: - storing artifacts (functions like `save_artifacts`, `persist` help with this) - storing other metadata about the task execution; this can include logs, general information about the task, user-level metadata and any other information the user wishes to persist about the task. Functions for this include `save_logs` and `save_metadata`. Internally, functions like `done` will also store information about the task. Artifacts are stored using the `ContentAddressedStore` that is common to all tasks in a flow; all other data and metadata is stored using the `DataStoreStorage` directly at a location indicated by the `pathspec` of the task. #### Saving artifacts To save artifacts, the `TaskDataStore` will first pickle the artifacts, thereby transforming a Python object into bytes. Those bytes will then be passed down to the `ContentAddressedStore`. In other words, in terms of data transformation: - Initially you have a pickle-able Python object - `TaskDataStore` pickles it and transforms it to `bytes` - Those `bytes` are then de-duped by the `ContentAddressedStore` - The `ContentAddressedStore` will also gzip the `bytes` and store them in the storage backend. Crucially, the `TaskDataStore` takes (and returns when loading artifacts) Python objects whereas the `ContentAddressedStore` only operates with bytes. #### Saving metadata and logs Metadata and logs are stored directly as files using the `DataStoreStorage` to create and write to a file. The name of the file is something that `TaskDataStore` determines internally. ### `FlowDataStore` class The `FlowDataStore` class doesn't do much except give access to `TaskDataStore` (in effect, it creates the `TaskDataStore` objects to use) and also allows files to be stored in the `ContentAddressedStore` directly. This is used to store, for example, code packages. Files stored using the `save_data` method are stored in `raw` format (as in, they are not further compressed). They will, however, still be de-duped. ### Caching The datastore allows the inclusion of caching at the `ContentAddressedStore` level: - for blobs (basically the objects returned by `load_blobs` in the `ContentAddressedStore`). Objects in this cache have gone through: reading from the backend storage system and the data transformations in `ContentAddressedStore`. The datastore does not determine how and where to cache the data and simply calls the functions `load_key` and `store_key` on a cache configured by the user using `set_blob_cache`. `load_key` is expected to return the object in the cache (if present) or None otherwise. `store_key` takes a key (the one passed to `load`) and the object to store. The outside cache is free to implement its own policies and/or own behavior for the `load_key` and `store_key` functions. As an example, the `FileCache` uses the `blob_cache` construct to write to a file anything passed to `store_key` and returns it by reading from the file when `load_key` is called. The persistence of the file is controlled by the `FileCache` so an artifact `store_key`ed may vanish from the cache and would be re-downloaded by the datastore when needed (and then added to the cache again). ================================================ FILE: docs/lifecycle.dot ================================================ digraph Metaflow { /* LEGEND palegreen2: environment lightblue2: decorator tan: command lightgoldenrod1: metadata lightpink2: function call grey78: event / change in control */ graph [fontsize=10, fontname="Noto Mono"] node [width=2.5, height=1, shape=record, fontname="Noto Mono", style=filled] edge [fontname="Nimbus Mono L"] subgraph cluster_init { label="Initialization" labeljust=l fontsize=14 validate_env [label="{environment|validate_environment}", fillcolor=palegreen2] flow_init [label="{decorator|flow_init}", fillcolor=lightblue2] step_init [label="{decorator|step_init}", fillcolor=lightblue2] choose_command [shape="circle", label="Choose\nCommand", width=1, fillcolor=grey78] } subgraph cluster_package { label="Code Package" labeljust=l fontsize=14 validate_dag [label="{graph|validate}", fillcolor=lightpink2] init_environment [label="{environment|init_environment}", fillcolor=palegreen2] package_init [label="{decorator|package_init}", fillcolor=lightblue2] add_custom_package [label="{decorator|add_to_package}", fillcolor=lightblue2] add_to_package [label="{environment|add_to_package}", fillcolor=palegreen2] package [label="{package|create}", fillcolor=lightpink2] } subgraph cluster_local_run { label="Local Run" labeljust=l fontsize=14 command_run [label="{command|run}", fillcolor=tan] new_run_id [label="{metadata|new_run_id}", fillcolor=lightgoldenrod1] runtime_init [label="{decorator|runtime_init}", fillcolor=lightblue2] local_params [label="{runtime|persist_constants}", fillcolor=lightpink2] start_run_heartbeat [label="{metadata|start_run_heartbeat}", fillcolor=lightgoldenrod1] schedule_local_task [shape="circle", label="Schedule\nTask", width=1, fillcolor=grey78] runtime_finished [label="{decorator|runtime_finished}", fillcolor=lightblue2] stop_run_heartbeat [label="{metadata|stop_run_heartbeat}", fillcolor=lightgoldenrod1] } subgraph cluster_init_deuce { label="Initialization" labeljust=l fontsize=14 validate_env_deuce [label="{environment|validate_environment}", fillcolor=palegreen2] flow_init_deuce [label="{decorator|flow_init}", fillcolor=lightblue2] step_init_deuce [label="{decorator|step_init}", fillcolor=lightblue2] choose_command_deuce [shape="circle", label="Choose\nCommand", width=1, fillcolor=grey78] } subgraph cluster_stepfunctions_deploy { label="Deploy to AWS Step Functions" labeljust=l fontsize=14 stepfunctions_create [label="{command|step-functions create}", fillcolor=tan] push_to_stepfunctions [shape="circle", label="Push to AWS\nStep Functions", width=1, fillcolor=grey78] } subgraph cluster_batch { label="Launch on AWS Batch" labeljust=l fontsize=14 batch_step [label="{command|batch step}", fillcolor=tan] launch_batch [label="{AWS Batch|launch_job}", fillcolor=lightpink2] local_bootstrap_batch [shape="circle", label="Bootstrap\nAWS Batch", width=1, fillcolor=grey78] } subgraph cluster_stepfunctions_run { label="AWS Step Functions Trigger" labeljust=l fontsize=14 stepfunctions_trigger [label="{command|step-functions trigger}", fillcolor=tan] stepfunctions_run [label="{AWS Step Functions|start_execution}", fillcolor=lightpink2] stepfunctions_bootstrap_batch [shape="circle", label="Bootstrap\nAWS Batch", width=1, fillcolor=grey78] stepfunctions_init [label="{command|init}" fillcolor=tan] stepfunctions_params [label="{runtime|persist_constants}", fillcolor=lightpink2] stepfunctions_task [shape="circle", label="Execute\nTask", width=1, fillcolor=grey78] } subgraph cluster_local_task { label="Initialize Local Task" labeljust=l fontsize=14 new_local_task [label="{metadata|new_task_id}", fillcolor=lightgoldenrod1] runtime_task_created [label="{decorator|runtime_task_created}", fillcolor=lightblue2] runtime_step_cli [label="{decorator|runtime_step_cli}", fillcolor=lightblue2] launch_local [shape="circle", label="Execute\nTask", width=1, fillcolor=grey78] } subgraph cluster_task { label="Execute Task" labeljust=l fontsize=14 task_entry [label="{command|step}" fillcolor=tan] register_run [label="{metadata|register_run_id}", fillcolor=lightgoldenrod1] register_task [label="{metadata|register_task_id}", fillcolor=lightgoldenrod1] start_task_heartbeat [label="{metadata|start_task_heartbeat}", fillcolor=lightgoldenrod1] task_pre_step [label="{decorator|task_pre_step}", fillcolor=lightblue2] task_decorate [label="{decorator|task_decorate}", fillcolor=lightblue2] user_code [shape="circle", label="Execute\nUser Code", width=1, fillcolor=grey78] task_post_step [label="{decorator|task_post_step}", fillcolor=lightblue2] task_exception [label="{decorator|task_exception}", fillcolor=lightblue2] persist_artifacts [label="{datastore|persist}", fillcolor=lightpink2] stop_task_heartbeat [label="{metadata|stop_task_heartbeat}", fillcolor=lightgoldenrod1] register_artifacts [label="{metadata|register_artifacts}", fillcolor=lightgoldenrod1] task_finished [label="{decorator|task_finished}", fillcolor=lightblue2] } /* initialize */ validate_env -> flow_init flow_init -> step_init step_init -> choose_command choose_command -> validate_dag validate_env_deuce -> flow_init_deuce flow_init_deuce -> step_init_deuce step_init_deuce -> choose_command_deuce /* package */ validate_dag -> init_environment init_environment -> package_init package_init -> add_custom_package add_custom_package -> add_to_package add_to_package -> package package -> command_run package -> stepfunctions_create /* stepfunctions deploy */ stepfunctions_create -> push_to_stepfunctions /* local run */ command_run -> new_run_id new_run_id -> runtime_init runtime_init -> local_params local_params -> start_run_heartbeat start_run_heartbeat -> schedule_local_task schedule_local_task -> new_local_task [label="for each task"] schedule_local_task -> runtime_finished runtime_finished -> stop_run_heartbeat [label="flow finished"] /* local task */ new_local_task -> runtime_task_created runtime_task_created -> runtime_step_cli runtime_step_cli -> launch_local launch_local -> validate_env_deuce choose_command_deuce -> task_entry [label="local task"] choose_command_deuce -> batch_step [label="AWS Batch task"] /* batch run */ batch_step -> launch_batch launch_batch -> local_bootstrap_batch local_bootstrap_batch -> validate_env_deuce /* step functions run */ stepfunctions_trigger -> stepfunctions_run stepfunctions_run -> stepfunctions_bootstrap_batch stepfunctions_bootstrap_batch -> stepfunctions_init [label="AWS Step Functions start"] stepfunctions_bootstrap_batch -> stepfunctions_task [label="AWS Step Functions task"] stepfunctions_init -> stepfunctions_params stepfunctions_params -> stepfunctions_task stepfunctions_task -> validate_env_deuce /* task */ task_entry -> register_run register_run -> register_task register_task -> start_task_heartbeat start_task_heartbeat -> task_pre_step task_pre_step -> task_decorate task_decorate -> user_code user_code -> task_post_step [label="Task success"] user_code -> task_exception [label="Task failed"] task_post_step -> persist_artifacts task_exception -> persist_artifacts persist_artifacts -> stop_task_heartbeat stop_task_heartbeat -> register_artifacts register_artifacts -> task_finished } ================================================ FILE: docs/sidecars.md ================================================ # Sidecars overview ## Purpose There are several use cases around logging, monitoring, and possibly other “tier 2” features that would benefit from a nonblocking implementation. So anything running within a sidecar should be able to be executed asynchronously from the main process, with no strong consistency requirement between it and the main process. This will help ensure that errors in non-critical flows do not cause the whole workflow to fail and reduces the latency overhead added by the platform itself. ## Design/Architecture Sidecars are run under a separate subprocess (sidecar worker) that engages in one-way communication with the main process (sidecar class) via [pipes](https://www.tutorialspoint.com/inter_process_communication/inter_process_communication_pipes.htm). The sidecar worker consumes messages from the main process via stdin and logs debug and error messages to stderr. Note that since metaflow blocks the completion of a task until the termination of stdout (to collect the logs), the stdout for sidecars is directed to dev/nul instead of inheriting the stdout of the parent process to ensure the process is non-blocking.
#### Interface
Every implementation of sidecar needs to implement the following two methods:
#### `def process_message(msg: Message)`
- The function that handles how each message is processed
#### `def shutdown()`
- Defines the "best effort" shutdown mechanism for the subprocess.
## Specific Implementations
### Heartbeat
We send heart beats to metadata service in a sidecar, `heartbeat.py` to
detect whether the task is alive. Since heart beats are purely informational,
we didn't want to increase the latency of the main process due to these
service calls, nor we wanted to fail the whole parent process in case of a
request failing. A sidecar that handles communication with the metadata
service was a perfect solution.
================================================
FILE: docs/update_lifecycle_png
================================================
# install graphviz first
dot -Tpng lifecycle.dot -o lifecycle.png
================================================
FILE: metaflow/R.py
================================================
import os
import sys
from importlib import util as imp_util, machinery as imp_machinery
from tempfile import NamedTemporaryFile
from . import parameters
from .util import to_bytes
R_FUNCTIONS = {}
R_PACKAGE_PATHS = None
RDS_FILE_PATH = None
R_CONTAINER_IMAGE = None
METAFLOW_R_VERSION = None
R_VERSION = None
R_VERSION_CODE = None
def call_r(func_name, args):
R_FUNCTIONS[func_name](*args)
def get_r_func(func_name):
return R_FUNCTIONS[func_name]
def package_paths():
if R_PACKAGE_PATHS is not None:
root = R_PACKAGE_PATHS["package"]
prefixlen = len("%s/" % root.rstrip("/"))
for path, dirs, files in os.walk(R_PACKAGE_PATHS["package"]):
if "/." in path:
continue
for fname in files:
if fname[0] == ".":
continue
p = os.path.join(path, fname)
yield p, os.path.join("metaflow-r", p[prefixlen:])
flow = R_PACKAGE_PATHS["flow"]
yield flow, os.path.basename(flow)
def entrypoint():
return (
"PYTHONPATH=/root/metaflow R_LIBS_SITE=`Rscript -e 'cat(paste(.libPaths(), collapse=\\\":\\\"))'`:metaflow/ Rscript metaflow-r/run_batch.R --flowRDS=%s"
% RDS_FILE_PATH
)
def use_r():
return R_PACKAGE_PATHS is not None
def container_image():
return R_CONTAINER_IMAGE
def metaflow_r_version():
return METAFLOW_R_VERSION
def r_version():
return R_VERSION
def r_version_code():
return R_VERSION_CODE
def working_dir():
if use_r():
return R_PACKAGE_PATHS["wd"]
return None
def load_module_from_path(module_name: str, path: str):
"""
Loads a module from a given path
Parameters
----------
module_name: str
Name to assign for the loaded module. Usable for importing after loading.
path: str
path to the file to be loaded
"""
loader = imp_machinery.SourceFileLoader(module_name, path)
spec = imp_util.spec_from_loader(loader.name, loader)
module = imp_util.module_from_spec(spec)
loader.exec_module(module)
# Required in order to be able to import the module by name later.
sys.modules[module_name] = module
return module
def run(
flow_script,
r_functions,
rds_file,
metaflow_args,
full_cmdline,
r_paths,
r_container_image,
metaflow_r_version,
r_version,
r_version_code,
):
global R_FUNCTIONS, R_PACKAGE_PATHS, RDS_FILE_PATH, R_CONTAINER_IMAGE, METAFLOW_R_VERSION, R_VERSION, R_VERSION_CODE
R_FUNCTIONS = r_functions
R_PACKAGE_PATHS = r_paths
RDS_FILE_PATH = rds_file
R_CONTAINER_IMAGE = r_container_image
METAFLOW_R_VERSION = metaflow_r_version
R_VERSION = r_version
R_VERSION_CODE = r_version_code
# there's some reticulate(?) sillyness which causes metaflow_args
# not to be a list if it has only one item. Here's a workaround
if not isinstance(metaflow_args, list):
metaflow_args = [metaflow_args]
# remove any reference to local path structure from R
full_cmdline[0] = os.path.basename(full_cmdline[0])
with NamedTemporaryFile(prefix="metaflowR.", delete=False) as tmp:
tmp.write(to_bytes(flow_script))
module = load_module_from_path("metaflowR", tmp.name)
flow = module.FLOW(use_cli=False)
from . import exception
try:
with parameters.flow_context(flow.__class__) as _:
from . import cli
cli.main(
flow,
args=metaflow_args,
handle_exceptions=False,
entrypoint=full_cmdline[: -len(metaflow_args)],
)
except exception.MetaflowException as e:
cli.print_metaflow_exception(e)
os.remove(tmp.name)
os._exit(1)
except Exception as e:
import sys
print(e)
sys.stdout.flush()
os.remove(tmp.name)
os._exit(1)
finally:
os.remove(tmp.name)
================================================
FILE: metaflow/__init__.py
================================================
"""
Welcome to Metaflow!
Metaflow is a microframework for data science projects.
There are two main use cases for this package:
1) You can define new flows using the `FlowSpec`
class and related decorators.
2) You can access and inspect existing flows.
You can instantiate a `Metaflow` class to
get an entry point to all existing objects.
# How to work with flows
A flow is a directed graph of Python functions called steps.
Metaflow takes care of executing these steps one by one in various
environments, such as on a local laptop or compute environments
(such as AWS Batch for example). It snapshots
data and code related to each run, so you can resume, reproduce,
and inspect results easily at a later point in time.
Here is a high-level overview of objects related to flows:
[ FlowSpec ] (0) Base class for flows.
[ MyFlow ] (1) Subclass from FlowSpec to define a new flow.
define new flows
----------------- (2) Run MyFlow on the command line.
access results
[ Flow ] (3) Access your flow with `Flow('MyFlow')`.
[ Run ] (4) Access a specific run with `Run('MyFlow/RunID')`.
[ Step ] (5) Access a specific step by its name, e.g. `run['end']`.
[ Task ] (6) Access a task related to the step with `step.task`.
[ DataArtifact ] (7) Access data of a task with `task.data`.
# More questions?
If you have any questions, feel free to post a bug report/question on the
Metaflow GitHub page.
"""
import os
import sys
from metaflow.extension_support import (
alias_submodules,
get_modules,
lazy_load_aliases,
load_globals,
load_module,
EXT_PKG,
_ext_debug,
)
# We load the module overrides *first* explicitly. Non overrides can be loaded
# in toplevel as well but these can be loaded first if needed. Note that those
# modules should be careful not to include anything in Metaflow at their top-level
# as it is likely to not work.
_override_modules = []
_tl_modules = []
try:
_modules_to_import = get_modules("toplevel")
for m in _modules_to_import:
override_module = m.module.__dict__.get("module_overrides", None)
if override_module is not None:
_override_modules.append(
".".join([EXT_PKG, m.tl_package, "toplevel", override_module])
)
tl_module = m.module.__dict__.get("toplevel", None)
if tl_module is not None:
_tl_modules.append(
(
m.package_name,
".".join([EXT_PKG, m.tl_package, "toplevel", tl_module]),
)
)
_ext_debug("Got overrides to load: %s" % _override_modules)
_ext_debug("Got top-level imports: %s" % str(_tl_modules))
except Exception as e:
_ext_debug("Error in importing toplevel/overrides: %s" % e)
# Load overrides now that we have them (in the proper order)
for m in _override_modules:
extension_module = load_module(m)
if extension_module:
# We load only modules
tl_package = m.split(".")[1]
lazy_load_aliases(alias_submodules(extension_module, tl_package, None))
# Utilities
from .multicore_utils import parallel_imap_unordered, parallel_map
from .metaflow_profile import profile
# current runtime singleton
from .metaflow_current import current
# Flow spec
from .flowspec import FlowSpec
from .parameters import Parameter, JSONTypeClass, JSONType
from .user_configs.config_parameters import Config, ConfigValue, config_expr
from .user_decorators.user_step_decorator import (
UserStepDecorator,
StepMutator,
user_step_decorator,
USER_SKIP_STEP,
)
from .user_decorators.user_flow_decorator import FlowMutator
# data layer
# For historical reasons, we make metaflow.plugins.datatools accessible as
# metaflow.datatools. S3 is also a tool that has historically been available at the
# top-level so keep as is.
lazy_load_aliases({"metaflow.datatools": "metaflow.plugins.datatools"})
from .plugins.datatools import S3
# includefile
from .includefile import IncludeFile
# Decorators
from .decorators import step, _import_plugin_decorators
# Parsers (for configs) for now
from .plugins import _import_tl_plugins
_import_tl_plugins(globals())
# this auto-generates decorator functions from Decorator objects
# in the top-level metaflow namespace
_import_plugin_decorators(globals())
# Setting card import for only python 3.6
if sys.version_info[0] >= 3 and sys.version_info[1] >= 6:
from . import cards
# Client
from .client import (
namespace,
get_namespace,
default_namespace,
metadata,
get_metadata,
default_metadata,
inspect_spin,
Metaflow,
Flow,
Run,
Step,
Task,
DataArtifact,
)
# Import data class within tuple_util but not introduce new symbols.
from . import tuple_util
# Runner API
if sys.version_info >= (3, 7):
from .runner.metaflow_runner import Runner
from .runner.nbrun import NBRunner
from .runner.deployer import Deployer
from .runner.deployer import DeployedFlow
from .runner.nbdeploy import NBDeployer
__ext_tl_modules__ = []
_ext_debug("Loading top-level modules")
for pkg_name, m in _tl_modules:
extension_module = load_module(m)
if extension_module:
tl_package = m.split(".")[1]
load_globals(extension_module, globals(), extra_indent=True)
lazy_load_aliases(
alias_submodules(extension_module, tl_package, None, extra_indent=True)
)
__ext_tl_modules__.append((pkg_name, extension_module))
# Erase all temporary names to avoid leaking things
for _n in [
"_ext_debug",
"alias_submodules",
"get_modules",
"lazy_load_aliases",
"load_globals",
"load_module",
EXT_PKG,
"_override_modules",
"_tl_modules",
"_modules_to_import",
"m",
"override_module",
"tl_module",
"extension_module",
"tl_package",
"version_info",
]:
try:
del globals()[_n]
except KeyError:
pass
del globals()["_n"]
from .version import metaflow_version as _mf_version
__version__ = _mf_version
================================================
FILE: metaflow/_vendor/PyYAML.LICENSE
================================================
Copyright (c) 2017-2020 Ingy döt Net
Copyright (c) 2006-2016 Kirill Simonov
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: metaflow/_vendor/__init__.py
================================================
"""
metaflow._vendor is for vendoring dependencies of metaflow. Files
inside of metaflow._vendor should be considered immutable and
should only be updated to versions from upstream.
This folder is generated by `python vendor.py`
If you would like to debundle the vendored dependencies, please
reach out to the maintainers at chat.metaflow.org
"""
================================================
FILE: metaflow/_vendor/click/__init__.py
================================================
"""
Click is a simple Python module inspired by the stdlib optparse to make
writing command line scripts fun. Unlike other modules, it's based
around a simple API that does not come with too much magic and is
composable.
"""
from .core import Argument
from .core import BaseCommand
from .core import Command
from .core import CommandCollection
from .core import Context
from .core import Group
from .core import MultiCommand
from .core import Option
from .core import Parameter
from .decorators import argument
from .decorators import command
from .decorators import confirmation_option
from .decorators import group
from .decorators import help_option
from .decorators import make_pass_decorator
from .decorators import option
from .decorators import pass_context
from .decorators import pass_obj
from .decorators import password_option
from .decorators import version_option
from .exceptions import Abort
from .exceptions import BadArgumentUsage
from .exceptions import BadOptionUsage
from .exceptions import BadParameter
from .exceptions import ClickException
from .exceptions import FileError
from .exceptions import MissingParameter
from .exceptions import NoSuchOption
from .exceptions import UsageError
from .formatting import HelpFormatter
from .formatting import wrap_text
from .globals import get_current_context
from .parser import OptionParser
from .termui import clear
from .termui import confirm
from .termui import echo_via_pager
from .termui import edit
from .termui import get_terminal_size
from .termui import getchar
from .termui import launch
from .termui import pause
from .termui import progressbar
from .termui import prompt
from .termui import secho
from .termui import style
from .termui import unstyle
from .types import BOOL
from .types import Choice
from .types import DateTime
from .types import File
from .types import FLOAT
from .types import FloatRange
from .types import INT
from .types import IntRange
from .types import ParamType
from .types import Path
from .types import STRING
from .types import Tuple
from .types import UNPROCESSED
from .types import UUID
from .utils import echo
from .utils import format_filename
from .utils import get_app_dir
from .utils import get_binary_stream
from .utils import get_os_args
from .utils import get_text_stream
from .utils import open_file
# Controls if click should emit the warning about the use of unicode
# literals.
disable_unicode_literals_warning = False
__version__ = "7.1.2"
================================================
FILE: metaflow/_vendor/click/_bashcomplete.py
================================================
import copy
import os
import re
from .core import Argument
from .core import MultiCommand
from .core import Option
from .parser import split_arg_string
from .types import Choice
from .utils import echo
try:
from collections import abc
except ImportError:
import collections as abc
WORDBREAK = "="
# Note, only BASH version 4.4 and later have the nosort option.
COMPLETION_SCRIPT_BASH = """
%(complete_func)s() {
local IFS=$'\n'
COMPREPLY=( $( env COMP_WORDS="${COMP_WORDS[*]}" \\
COMP_CWORD=$COMP_CWORD \\
%(autocomplete_var)s=complete $1 ) )
return 0
}
%(complete_func)setup() {
local COMPLETION_OPTIONS=""
local BASH_VERSION_ARR=(${BASH_VERSION//./ })
# Only BASH version 4.4 and later have the nosort option.
if [ ${BASH_VERSION_ARR[0]} -gt 4 ] || ([ ${BASH_VERSION_ARR[0]} -eq 4 ] \
&& [ ${BASH_VERSION_ARR[1]} -ge 4 ]); then
COMPLETION_OPTIONS="-o nosort"
fi
complete $COMPLETION_OPTIONS -F %(complete_func)s %(script_names)s
}
%(complete_func)setup
"""
COMPLETION_SCRIPT_ZSH = """
#compdef %(script_names)s
%(complete_func)s() {
local -a completions
local -a completions_with_descriptions
local -a response
(( ! $+commands[%(script_names)s] )) && return 1
response=("${(@f)$( env COMP_WORDS=\"${words[*]}\" \\
COMP_CWORD=$((CURRENT-1)) \\
%(autocomplete_var)s=\"complete_zsh\" \\
%(script_names)s )}")
for key descr in ${(kv)response}; do
if [[ "$descr" == "_" ]]; then
completions+=("$key")
else
completions_with_descriptions+=("$key":"$descr")
fi
done
if [ -n "$completions_with_descriptions" ]; then
_describe -V unsorted completions_with_descriptions -U
fi
if [ -n "$completions" ]; then
compadd -U -V unsorted -a completions
fi
compstate[insert]="automenu"
}
compdef %(complete_func)s %(script_names)s
"""
COMPLETION_SCRIPT_FISH = (
"complete --no-files --command %(script_names)s --arguments"
' "(env %(autocomplete_var)s=complete_fish'
" COMP_WORDS=(commandline -cp) COMP_CWORD=(commandline -t)"
' %(script_names)s)"'
)
_completion_scripts = {
"bash": COMPLETION_SCRIPT_BASH,
"zsh": COMPLETION_SCRIPT_ZSH,
"fish": COMPLETION_SCRIPT_FISH,
}
_invalid_ident_char_re = re.compile(r"[^a-zA-Z0-9_]")
def get_completion_script(prog_name, complete_var, shell):
cf_name = _invalid_ident_char_re.sub("", prog_name.replace("-", "_"))
script = _completion_scripts.get(shell, COMPLETION_SCRIPT_BASH)
return (
script
% {
"complete_func": "_{}_completion".format(cf_name),
"script_names": prog_name,
"autocomplete_var": complete_var,
}
).strip() + ";"
def resolve_ctx(cli, prog_name, args):
"""Parse into a hierarchy of contexts. Contexts are connected
through the parent variable.
:param cli: command definition
:param prog_name: the program that is running
:param args: full list of args
:return: the final context/command parsed
"""
ctx = cli.make_context(prog_name, args, resilient_parsing=True)
args = ctx.protected_args + ctx.args
while args:
if isinstance(ctx.command, MultiCommand):
if not ctx.command.chain:
cmd_name, cmd, args = ctx.command.resolve_command(ctx, args)
if cmd is None:
return ctx
ctx = cmd.make_context(
cmd_name, args, parent=ctx, resilient_parsing=True
)
args = ctx.protected_args + ctx.args
else:
# Walk chained subcommand contexts saving the last one.
while args:
cmd_name, cmd, args = ctx.command.resolve_command(ctx, args)
if cmd is None:
return ctx
sub_ctx = cmd.make_context(
cmd_name,
args,
parent=ctx,
allow_extra_args=True,
allow_interspersed_args=False,
resilient_parsing=True,
)
args = sub_ctx.args
ctx = sub_ctx
args = sub_ctx.protected_args + sub_ctx.args
else:
break
return ctx
def start_of_option(param_str):
"""
:param param_str: param_str to check
:return: whether or not this is the start of an option declaration
(i.e. starts "-" or "--")
"""
return param_str and param_str[:1] == "-"
def is_incomplete_option(all_args, cmd_param):
"""
:param all_args: the full original list of args supplied
:param cmd_param: the current command paramter
:return: whether or not the last option declaration (i.e. starts
"-" or "--") is incomplete and corresponds to this cmd_param. In
other words whether this cmd_param option can still accept
values
"""
if not isinstance(cmd_param, Option):
return False
if cmd_param.is_flag:
return False
last_option = None
for index, arg_str in enumerate(
reversed([arg for arg in all_args if arg != WORDBREAK])
):
if index + 1 > cmd_param.nargs:
break
if start_of_option(arg_str):
last_option = arg_str
return True if last_option and last_option in cmd_param.opts else False
def is_incomplete_argument(current_params, cmd_param):
"""
:param current_params: the current params and values for this
argument as already entered
:param cmd_param: the current command parameter
:return: whether or not the last argument is incomplete and
corresponds to this cmd_param. In other words whether or not the
this cmd_param argument can still accept values
"""
if not isinstance(cmd_param, Argument):
return False
current_param_values = current_params[cmd_param.name]
if current_param_values is None:
return True
if cmd_param.nargs == -1:
return True
if (
isinstance(current_param_values, abc.Iterable)
and cmd_param.nargs > 1
and len(current_param_values) < cmd_param.nargs
):
return True
return False
def get_user_autocompletions(ctx, args, incomplete, cmd_param):
"""
:param ctx: context associated with the parsed command
:param args: full list of args
:param incomplete: the incomplete text to autocomplete
:param cmd_param: command definition
:return: all the possible user-specified completions for the param
"""
results = []
if isinstance(cmd_param.type, Choice):
# Choices don't support descriptions.
results = [
(c, None) for c in cmd_param.type.choices if str(c).startswith(incomplete)
]
elif cmd_param.autocompletion is not None:
dynamic_completions = cmd_param.autocompletion(
ctx=ctx, args=args, incomplete=incomplete
)
results = [
c if isinstance(c, tuple) else (c, None) for c in dynamic_completions
]
return results
def get_visible_commands_starting_with(ctx, starts_with):
"""
:param ctx: context associated with the parsed command
:starts_with: string that visible commands must start with.
:return: all visible (not hidden) commands that start with starts_with.
"""
for c in ctx.command.list_commands(ctx):
if c.startswith(starts_with):
command = ctx.command.get_command(ctx, c)
if not command.hidden:
yield command
def add_subcommand_completions(ctx, incomplete, completions_out):
# Add subcommand completions.
if isinstance(ctx.command, MultiCommand):
completions_out.extend(
[
(c.name, c.get_short_help_str())
for c in get_visible_commands_starting_with(ctx, incomplete)
]
)
# Walk up the context list and add any other completion
# possibilities from chained commands
while ctx.parent is not None:
ctx = ctx.parent
if isinstance(ctx.command, MultiCommand) and ctx.command.chain:
remaining_commands = [
c
for c in get_visible_commands_starting_with(ctx, incomplete)
if c.name not in ctx.protected_args
]
completions_out.extend(
[(c.name, c.get_short_help_str()) for c in remaining_commands]
)
def get_choices(cli, prog_name, args, incomplete):
"""
:param cli: command definition
:param prog_name: the program that is running
:param args: full list of args
:param incomplete: the incomplete text to autocomplete
:return: all the possible completions for the incomplete
"""
all_args = copy.deepcopy(args)
ctx = resolve_ctx(cli, prog_name, args)
if ctx is None:
return []
has_double_dash = "--" in all_args
# In newer versions of bash long opts with '='s are partitioned, but
# it's easier to parse without the '='
if start_of_option(incomplete) and WORDBREAK in incomplete:
partition_incomplete = incomplete.partition(WORDBREAK)
all_args.append(partition_incomplete[0])
incomplete = partition_incomplete[2]
elif incomplete == WORDBREAK:
incomplete = ""
completions = []
if not has_double_dash and start_of_option(incomplete):
# completions for partial options
for param in ctx.command.params:
if isinstance(param, Option) and not param.hidden:
param_opts = [
param_opt
for param_opt in param.opts + param.secondary_opts
if param_opt not in all_args or param.multiple
]
completions.extend(
[(o, param.help) for o in param_opts if o.startswith(incomplete)]
)
return completions
# completion for option values from user supplied values
for param in ctx.command.params:
if is_incomplete_option(all_args, param):
return get_user_autocompletions(ctx, all_args, incomplete, param)
# completion for argument values from user supplied values
for param in ctx.command.params:
if is_incomplete_argument(ctx.params, param):
return get_user_autocompletions(ctx, all_args, incomplete, param)
add_subcommand_completions(ctx, incomplete, completions)
# Sort before returning so that proper ordering can be enforced in custom types.
return sorted(completions)
def do_complete(cli, prog_name, include_descriptions):
cwords = split_arg_string(os.environ["COMP_WORDS"])
cword = int(os.environ["COMP_CWORD"])
args = cwords[1:cword]
try:
incomplete = cwords[cword]
except IndexError:
incomplete = ""
for item in get_choices(cli, prog_name, args, incomplete):
echo(item[0])
if include_descriptions:
# ZSH has trouble dealing with empty array parameters when
# returned from commands, use '_' to indicate no description
# is present.
echo(item[1] if item[1] else "_")
return True
def do_complete_fish(cli, prog_name):
cwords = split_arg_string(os.environ["COMP_WORDS"])
incomplete = os.environ["COMP_CWORD"]
args = cwords[1:]
for item in get_choices(cli, prog_name, args, incomplete):
if item[1]:
echo("{arg}\t{desc}".format(arg=item[0], desc=item[1]))
else:
echo(item[0])
return True
def bashcomplete(cli, prog_name, complete_var, complete_instr):
if "_" in complete_instr:
command, shell = complete_instr.split("_", 1)
else:
command = complete_instr
shell = "bash"
if command == "source":
echo(get_completion_script(prog_name, complete_var, shell))
return True
elif command == "complete":
if shell == "fish":
return do_complete_fish(cli, prog_name)
elif shell in {"bash", "zsh"}:
return do_complete(cli, prog_name, shell == "zsh")
return False
================================================
FILE: metaflow/_vendor/click/_compat.py
================================================
# flake8: noqa
import codecs
import io
import os
import re
import sys
from weakref import WeakKeyDictionary
PY2 = sys.version_info[0] == 2
CYGWIN = sys.platform.startswith("cygwin")
MSYS2 = sys.platform.startswith("win") and ("GCC" in sys.version)
# Determine local App Engine environment, per Google's own suggestion
APP_ENGINE = "APPENGINE_RUNTIME" in os.environ and "Development/" in os.environ.get(
"SERVER_SOFTWARE", ""
)
WIN = sys.platform.startswith("win") and not APP_ENGINE and not MSYS2
DEFAULT_COLUMNS = 80
_ansi_re = re.compile(r"\033\[[;?0-9]*[a-zA-Z]")
def get_filesystem_encoding():
return sys.getfilesystemencoding() or sys.getdefaultencoding()
def _make_text_stream(
stream, encoding, errors, force_readable=False, force_writable=False
):
if encoding is None:
encoding = get_best_encoding(stream)
if errors is None:
errors = "replace"
return _NonClosingTextIOWrapper(
stream,
encoding,
errors,
line_buffering=True,
force_readable=force_readable,
force_writable=force_writable,
)
def is_ascii_encoding(encoding):
"""Checks if a given encoding is ascii."""
try:
return codecs.lookup(encoding).name == "ascii"
except LookupError:
return False
def get_best_encoding(stream):
"""Returns the default stream encoding if not found."""
rv = getattr(stream, "encoding", None) or sys.getdefaultencoding()
if is_ascii_encoding(rv):
return "utf-8"
return rv
class _NonClosingTextIOWrapper(io.TextIOWrapper):
def __init__(
self,
stream,
encoding,
errors,
force_readable=False,
force_writable=False,
**extra
):
self._stream = stream = _FixupStream(stream, force_readable, force_writable)
io.TextIOWrapper.__init__(self, stream, encoding, errors, **extra)
# The io module is a place where the Python 3 text behavior
# was forced upon Python 2, so we need to unbreak
# it to look like Python 2.
if PY2:
def write(self, x):
if isinstance(x, str) or is_bytes(x):
try:
self.flush()
except Exception:
pass
return self.buffer.write(str(x))
return io.TextIOWrapper.write(self, x)
def writelines(self, lines):
for line in lines:
self.write(line)
def __del__(self):
try:
self.detach()
except Exception:
pass
def isatty(self):
# https://bitbucket.org/pypy/pypy/issue/1803
return self._stream.isatty()
class _FixupStream(object):
"""The new io interface needs more from streams than streams
traditionally implement. As such, this fix-up code is necessary in
some circumstances.
The forcing of readable and writable flags are there because some tools
put badly patched objects on sys (one such offender are certain version
of jupyter notebook).
"""
def __init__(self, stream, force_readable=False, force_writable=False):
self._stream = stream
self._force_readable = force_readable
self._force_writable = force_writable
def __getattr__(self, name):
return getattr(self._stream, name)
def read1(self, size):
f = getattr(self._stream, "read1", None)
if f is not None:
return f(size)
# We only dispatch to readline instead of read in Python 2 as we
# do not want cause problems with the different implementation
# of line buffering.
if PY2:
return self._stream.readline(size)
return self._stream.read(size)
def readable(self):
if self._force_readable:
return True
x = getattr(self._stream, "readable", None)
if x is not None:
return x()
try:
self._stream.read(0)
except Exception:
return False
return True
def writable(self):
if self._force_writable:
return True
x = getattr(self._stream, "writable", None)
if x is not None:
return x()
try:
self._stream.write("")
except Exception:
try:
self._stream.write(b"")
except Exception:
return False
return True
def seekable(self):
x = getattr(self._stream, "seekable", None)
if x is not None:
return x()
try:
self._stream.seek(self._stream.tell())
except Exception:
return False
return True
if PY2:
text_type = unicode
raw_input = raw_input
string_types = (str, unicode)
int_types = (int, long)
iteritems = lambda x: x.iteritems()
range_type = xrange
def is_bytes(x):
return isinstance(x, (buffer, bytearray))
_identifier_re = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
# For Windows, we need to force stdout/stdin/stderr to binary if it's
# fetched for that. This obviously is not the most correct way to do
# it as it changes global state. Unfortunately, there does not seem to
# be a clear better way to do it as just reopening the file in binary
# mode does not change anything.
#
# An option would be to do what Python 3 does and to open the file as
# binary only, patch it back to the system, and then use a wrapper
# stream that converts newlines. It's not quite clear what's the
# correct option here.
#
# This code also lives in _winconsole for the fallback to the console
# emulation stream.
#
# There are also Windows environments where the `msvcrt` module is not
# available (which is why we use try-catch instead of the WIN variable
# here), such as the Google App Engine development server on Windows. In
# those cases there is just nothing we can do.
def set_binary_mode(f):
return f
try:
import msvcrt
except ImportError:
pass
else:
def set_binary_mode(f):
try:
fileno = f.fileno()
except Exception:
pass
else:
msvcrt.setmode(fileno, os.O_BINARY)
return f
try:
import fcntl
except ImportError:
pass
else:
def set_binary_mode(f):
try:
fileno = f.fileno()
except Exception:
pass
else:
flags = fcntl.fcntl(fileno, fcntl.F_GETFL)
fcntl.fcntl(fileno, fcntl.F_SETFL, flags & ~os.O_NONBLOCK)
return f
def isidentifier(x):
return _identifier_re.search(x) is not None
def get_binary_stdin():
return set_binary_mode(sys.stdin)
def get_binary_stdout():
_wrap_std_stream("stdout")
return set_binary_mode(sys.stdout)
def get_binary_stderr():
_wrap_std_stream("stderr")
return set_binary_mode(sys.stderr)
def get_text_stdin(encoding=None, errors=None):
rv = _get_windows_console_stream(sys.stdin, encoding, errors)
if rv is not None:
return rv
return _make_text_stream(sys.stdin, encoding, errors, force_readable=True)
def get_text_stdout(encoding=None, errors=None):
_wrap_std_stream("stdout")
rv = _get_windows_console_stream(sys.stdout, encoding, errors)
if rv is not None:
return rv
return _make_text_stream(sys.stdout, encoding, errors, force_writable=True)
def get_text_stderr(encoding=None, errors=None):
_wrap_std_stream("stderr")
rv = _get_windows_console_stream(sys.stderr, encoding, errors)
if rv is not None:
return rv
return _make_text_stream(sys.stderr, encoding, errors, force_writable=True)
def filename_to_ui(value):
if isinstance(value, bytes):
value = value.decode(get_filesystem_encoding(), "replace")
return value
else:
import io
text_type = str
raw_input = input
string_types = (str,)
int_types = (int,)
range_type = range
isidentifier = lambda x: x.isidentifier()
iteritems = lambda x: iter(x.items())
def is_bytes(x):
return isinstance(x, (bytes, memoryview, bytearray))
def _is_binary_reader(stream, default=False):
try:
return isinstance(stream.read(0), bytes)
except Exception:
return default
# This happens in some cases where the stream was already
# closed. In this case, we assume the default.
def _is_binary_writer(stream, default=False):
try:
stream.write(b"")
except Exception:
try:
stream.write("")
return False
except Exception:
pass
return default
return True
def _find_binary_reader(stream):
# We need to figure out if the given stream is already binary.
# This can happen because the official docs recommend detaching
# the streams to get binary streams. Some code might do this, so
# we need to deal with this case explicitly.
if _is_binary_reader(stream, False):
return stream
buf = getattr(stream, "buffer", None)
# Same situation here; this time we assume that the buffer is
# actually binary in case it's closed.
if buf is not None and _is_binary_reader(buf, True):
return buf
def _find_binary_writer(stream):
# We need to figure out if the given stream is already binary.
# This can happen because the official docs recommend detatching
# the streams to get binary streams. Some code might do this, so
# we need to deal with this case explicitly.
if _is_binary_writer(stream, False):
return stream
buf = getattr(stream, "buffer", None)
# Same situation here; this time we assume that the buffer is
# actually binary in case it's closed.
if buf is not None and _is_binary_writer(buf, True):
return buf
def _stream_is_misconfigured(stream):
"""A stream is misconfigured if its encoding is ASCII."""
# If the stream does not have an encoding set, we assume it's set
# to ASCII. This appears to happen in certain unittest
# environments. It's not quite clear what the correct behavior is
# but this at least will force Click to recover somehow.
return is_ascii_encoding(getattr(stream, "encoding", None) or "ascii")
def _is_compat_stream_attr(stream, attr, value):
"""A stream attribute is compatible if it is equal to the
desired value or the desired value is unset and the attribute
has a value.
"""
stream_value = getattr(stream, attr, None)
return stream_value == value or (value is None and stream_value is not None)
def _is_compatible_text_stream(stream, encoding, errors):
"""Check if a stream's encoding and errors attributes are
compatible with the desired values.
"""
return _is_compat_stream_attr(
stream, "encoding", encoding
) and _is_compat_stream_attr(stream, "errors", errors)
def _force_correct_text_stream(
text_stream,
encoding,
errors,
is_binary,
find_binary,
force_readable=False,
force_writable=False,
):
if is_binary(text_stream, False):
binary_reader = text_stream
else:
# If the stream looks compatible, and won't default to a
# misconfigured ascii encoding, return it as-is.
if _is_compatible_text_stream(text_stream, encoding, errors) and not (
encoding is None and _stream_is_misconfigured(text_stream)
):
return text_stream
# Otherwise, get the underlying binary reader.
binary_reader = find_binary(text_stream)
# If that's not possible, silently use the original reader
# and get mojibake instead of exceptions.
if binary_reader is None:
return text_stream
# Default errors to replace instead of strict in order to get
# something that works.
if errors is None:
errors = "replace"
# Wrap the binary stream in a text stream with the correct
# encoding parameters.
return _make_text_stream(
binary_reader,
encoding,
errors,
force_readable=force_readable,
force_writable=force_writable,
)
def _force_correct_text_reader(text_reader, encoding, errors, force_readable=False):
return _force_correct_text_stream(
text_reader,
encoding,
errors,
_is_binary_reader,
_find_binary_reader,
force_readable=force_readable,
)
def _force_correct_text_writer(text_writer, encoding, errors, force_writable=False):
return _force_correct_text_stream(
text_writer,
encoding,
errors,
_is_binary_writer,
_find_binary_writer,
force_writable=force_writable,
)
def get_binary_stdin():
reader = _find_binary_reader(sys.stdin)
if reader is None:
raise RuntimeError("Was not able to determine binary stream for sys.stdin.")
return reader
def get_binary_stdout():
writer = _find_binary_writer(sys.stdout)
if writer is None:
raise RuntimeError(
"Was not able to determine binary stream for sys.stdout."
)
return writer
def get_binary_stderr():
writer = _find_binary_writer(sys.stderr)
if writer is None:
raise RuntimeError(
"Was not able to determine binary stream for sys.stderr."
)
return writer
def get_text_stdin(encoding=None, errors=None):
rv = _get_windows_console_stream(sys.stdin, encoding, errors)
if rv is not None:
return rv
return _force_correct_text_reader(
sys.stdin, encoding, errors, force_readable=True
)
def get_text_stdout(encoding=None, errors=None):
rv = _get_windows_console_stream(sys.stdout, encoding, errors)
if rv is not None:
return rv
return _force_correct_text_writer(
sys.stdout, encoding, errors, force_writable=True
)
def get_text_stderr(encoding=None, errors=None):
rv = _get_windows_console_stream(sys.stderr, encoding, errors)
if rv is not None:
return rv
return _force_correct_text_writer(
sys.stderr, encoding, errors, force_writable=True
)
def filename_to_ui(value):
if isinstance(value, bytes):
value = value.decode(get_filesystem_encoding(), "replace")
else:
value = value.encode("utf-8", "surrogateescape").decode("utf-8", "replace")
return value
def get_streerror(e, default=None):
if hasattr(e, "strerror"):
msg = e.strerror
else:
if default is not None:
msg = default
else:
msg = str(e)
if isinstance(msg, bytes):
msg = msg.decode("utf-8", "replace")
return msg
def _wrap_io_open(file, mode, encoding, errors):
"""On Python 2, :func:`io.open` returns a text file wrapper that
requires passing ``unicode`` to ``write``. Need to open the file in
binary mode then wrap it in a subclass that can write ``str`` and
``unicode``.
Also handles not passing ``encoding`` and ``errors`` in binary mode.
"""
binary = "b" in mode
if binary:
kwargs = {}
else:
kwargs = {"encoding": encoding, "errors": errors}
if not PY2 or binary:
return io.open(file, mode, **kwargs)
f = io.open(file, "{}b".format(mode.replace("t", "")))
return _make_text_stream(f, **kwargs)
def open_stream(filename, mode="r", encoding=None, errors="strict", atomic=False):
binary = "b" in mode
# Standard streams first. These are simple because they don't need
# special handling for the atomic flag. It's entirely ignored.
if filename == "-":
if any(m in mode for m in ["w", "a", "x"]):
if binary:
return get_binary_stdout(), False
return get_text_stdout(encoding=encoding, errors=errors), False
if binary:
return get_binary_stdin(), False
return get_text_stdin(encoding=encoding, errors=errors), False
# Non-atomic writes directly go out through the regular open functions.
if not atomic:
return _wrap_io_open(filename, mode, encoding, errors), True
# Some usability stuff for atomic writes
if "a" in mode:
raise ValueError(
"Appending to an existing file is not supported, because that"
" would involve an expensive `copy`-operation to a temporary"
" file. Open the file in normal `w`-mode and copy explicitly"
" if that's what you're after."
)
if "x" in mode:
raise ValueError("Use the `overwrite`-parameter instead.")
if "w" not in mode:
raise ValueError("Atomic writes only make sense with `w`-mode.")
# Atomic writes are more complicated. They work by opening a file
# as a proxy in the same folder and then using the fdopen
# functionality to wrap it in a Python file. Then we wrap it in an
# atomic file that moves the file over on close.
import errno
import random
try:
perm = os.stat(filename).st_mode
except OSError:
perm = None
flags = os.O_RDWR | os.O_CREAT | os.O_EXCL
if binary:
flags |= getattr(os, "O_BINARY", 0)
while True:
tmp_filename = os.path.join(
os.path.dirname(filename),
".__atomic-write{:08x}".format(random.randrange(1 << 32)),
)
try:
fd = os.open(tmp_filename, flags, 0o666 if perm is None else perm)
break
except OSError as e:
if e.errno == errno.EEXIST or (
os.name == "nt"
and e.errno == errno.EACCES
and os.path.isdir(e.filename)
and os.access(e.filename, os.W_OK)
):
continue
raise
if perm is not None:
os.chmod(tmp_filename, perm) # in case perm includes bits in umask
f = _wrap_io_open(fd, mode, encoding, errors)
return _AtomicFile(f, tmp_filename, os.path.realpath(filename)), True
# Used in a destructor call, needs extra protection from interpreter cleanup.
if hasattr(os, "replace"):
_replace = os.replace
_can_replace = True
else:
_replace = os.rename
_can_replace = not WIN
class _AtomicFile(object):
def __init__(self, f, tmp_filename, real_filename):
self._f = f
self._tmp_filename = tmp_filename
self._real_filename = real_filename
self.closed = False
@property
def name(self):
return self._real_filename
def close(self, delete=False):
if self.closed:
return
self._f.close()
if not _can_replace:
try:
os.remove(self._real_filename)
except OSError:
pass
_replace(self._tmp_filename, self._real_filename)
self.closed = True
def __getattr__(self, name):
return getattr(self._f, name)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, tb):
self.close(delete=exc_type is not None)
def __repr__(self):
return repr(self._f)
auto_wrap_for_ansi = None
colorama = None
get_winterm_size = None
def strip_ansi(value):
return _ansi_re.sub("", value)
def _is_jupyter_kernel_output(stream):
if WIN:
# TODO: Couldn't test on Windows, should't try to support until
# someone tests the details wrt colorama.
return
while isinstance(stream, (_FixupStream, _NonClosingTextIOWrapper)):
stream = stream._stream
return stream.__class__.__module__.startswith("ipykernel.")
def should_strip_ansi(stream=None, color=None):
if color is None:
if stream is None:
stream = sys.stdin
return not isatty(stream) and not _is_jupyter_kernel_output(stream)
return not color
# If we're on Windows, we provide transparent integration through
# colorama. This will make ANSI colors through the echo function
# work automatically.
if WIN:
# Windows has a smaller terminal
DEFAULT_COLUMNS = 79
from ._winconsole import _get_windows_console_stream, _wrap_std_stream
def _get_argv_encoding():
import locale
return locale.getpreferredencoding()
if PY2:
def raw_input(prompt=""):
sys.stderr.flush()
if prompt:
stdout = _default_text_stdout()
stdout.write(prompt)
stdin = _default_text_stdin()
return stdin.readline().rstrip("\r\n")
try:
import colorama
except ImportError:
pass
else:
_ansi_stream_wrappers = WeakKeyDictionary()
def auto_wrap_for_ansi(stream, color=None):
"""This function wraps a stream so that calls through colorama
are issued to the win32 console API to recolor on demand. It
also ensures to reset the colors if a write call is interrupted
to not destroy the console afterwards.
"""
try:
cached = _ansi_stream_wrappers.get(stream)
except Exception:
cached = None
if cached is not None:
return cached
strip = should_strip_ansi(stream, color)
ansi_wrapper = colorama.AnsiToWin32(stream, strip=strip)
rv = ansi_wrapper.stream
_write = rv.write
def _safe_write(s):
try:
return _write(s)
except:
ansi_wrapper.reset_all()
raise
rv.write = _safe_write
try:
_ansi_stream_wrappers[stream] = rv
except Exception:
pass
return rv
def get_winterm_size():
win = colorama.win32.GetConsoleScreenBufferInfo(
colorama.win32.STDOUT
).srWindow
return win.Right - win.Left, win.Bottom - win.Top
else:
def _get_argv_encoding():
return getattr(sys.stdin, "encoding", None) or get_filesystem_encoding()
_get_windows_console_stream = lambda *x: None
_wrap_std_stream = lambda *x: None
def term_len(x):
return len(strip_ansi(x))
def isatty(stream):
try:
return stream.isatty()
except Exception:
return False
def _make_cached_stream_func(src_func, wrapper_func):
cache = WeakKeyDictionary()
def func():
stream = src_func()
try:
rv = cache.get(stream)
except Exception:
rv = None
if rv is not None:
return rv
rv = wrapper_func()
try:
stream = src_func() # In case wrapper_func() modified the stream
cache[stream] = rv
except Exception:
pass
return rv
return func
_default_text_stdin = _make_cached_stream_func(lambda: sys.stdin, get_text_stdin)
_default_text_stdout = _make_cached_stream_func(lambda: sys.stdout, get_text_stdout)
_default_text_stderr = _make_cached_stream_func(lambda: sys.stderr, get_text_stderr)
binary_streams = {
"stdin": get_binary_stdin,
"stdout": get_binary_stdout,
"stderr": get_binary_stderr,
}
text_streams = {
"stdin": get_text_stdin,
"stdout": get_text_stdout,
"stderr": get_text_stderr,
}
================================================
FILE: metaflow/_vendor/click/_termui_impl.py
================================================
# -*- coding: utf-8 -*-
"""
This module contains implementations for the termui module. To keep the
import time of Click down, some infrequently used functionality is
placed in this module and only imported as needed.
"""
import contextlib
import math
import os
import sys
import time
from ._compat import _default_text_stdout
from ._compat import CYGWIN
from ._compat import get_best_encoding
from ._compat import int_types
from ._compat import isatty
from ._compat import open_stream
from ._compat import range_type
from ._compat import strip_ansi
from ._compat import term_len
from ._compat import WIN
from .exceptions import ClickException
from .utils import echo
if os.name == "nt":
BEFORE_BAR = "\r"
AFTER_BAR = "\n"
else:
BEFORE_BAR = "\r\033[?25l"
AFTER_BAR = "\033[?25h\n"
def _length_hint(obj):
"""Returns the length hint of an object."""
try:
return len(obj)
except (AttributeError, TypeError):
try:
get_hint = type(obj).__length_hint__
except AttributeError:
return None
try:
hint = get_hint(obj)
except TypeError:
return None
if hint is NotImplemented or not isinstance(hint, int_types) or hint < 0:
return None
return hint
class ProgressBar(object):
def __init__(
self,
iterable,
length=None,
fill_char="#",
empty_char=" ",
bar_template="%(bar)s",
info_sep=" ",
show_eta=True,
show_percent=None,
show_pos=False,
item_show_func=None,
label=None,
file=None,
color=None,
width=30,
):
self.fill_char = fill_char
self.empty_char = empty_char
self.bar_template = bar_template
self.info_sep = info_sep
self.show_eta = show_eta
self.show_percent = show_percent
self.show_pos = show_pos
self.item_show_func = item_show_func
self.label = label or ""
if file is None:
file = _default_text_stdout()
self.file = file
self.color = color
self.width = width
self.autowidth = width == 0
if length is None:
length = _length_hint(iterable)
if iterable is None:
if length is None:
raise TypeError("iterable or length is required")
iterable = range_type(length)
self.iter = iter(iterable)
self.length = length
self.length_known = length is not None
self.pos = 0
self.avg = []
self.start = self.last_eta = time.time()
self.eta_known = False
self.finished = False
self.max_width = None
self.entered = False
self.current_item = None
self.is_hidden = not isatty(self.file)
self._last_line = None
self.short_limit = 0.5
def __enter__(self):
self.entered = True
self.render_progress()
return self
def __exit__(self, exc_type, exc_value, tb):
self.render_finish()
def __iter__(self):
if not self.entered:
raise RuntimeError("You need to use progress bars in a with block.")
self.render_progress()
return self.generator()
def __next__(self):
# Iteration is defined in terms of a generator function,
# returned by iter(self); use that to define next(). This works
# because `self.iter` is an iterable consumed by that generator,
# so it is re-entry safe. Calling `next(self.generator())`
# twice works and does "what you want".
return next(iter(self))
# Python 2 compat
next = __next__
def is_fast(self):
return time.time() - self.start <= self.short_limit
def render_finish(self):
if self.is_hidden or self.is_fast():
return
self.file.write(AFTER_BAR)
self.file.flush()
@property
def pct(self):
if self.finished:
return 1.0
return min(self.pos / (float(self.length) or 1), 1.0)
@property
def time_per_iteration(self):
if not self.avg:
return 0.0
return sum(self.avg) / float(len(self.avg))
@property
def eta(self):
if self.length_known and not self.finished:
return self.time_per_iteration * (self.length - self.pos)
return 0.0
def format_eta(self):
if self.eta_known:
t = int(self.eta)
seconds = t % 60
t //= 60
minutes = t % 60
t //= 60
hours = t % 24
t //= 24
if t > 0:
return "{}d {:02}:{:02}:{:02}".format(t, hours, minutes, seconds)
else:
return "{:02}:{:02}:{:02}".format(hours, minutes, seconds)
return ""
def format_pos(self):
pos = str(self.pos)
if self.length_known:
pos += "/{}".format(self.length)
return pos
def format_pct(self):
return "{: 4}%".format(int(self.pct * 100))[1:]
def format_bar(self):
if self.length_known:
bar_length = int(self.pct * self.width)
bar = self.fill_char * bar_length
bar += self.empty_char * (self.width - bar_length)
elif self.finished:
bar = self.fill_char * self.width
else:
bar = list(self.empty_char * (self.width or 1))
if self.time_per_iteration != 0:
bar[
int(
(math.cos(self.pos * self.time_per_iteration) / 2.0 + 0.5)
* self.width
)
] = self.fill_char
bar = "".join(bar)
return bar
def format_progress_line(self):
show_percent = self.show_percent
info_bits = []
if self.length_known and show_percent is None:
show_percent = not self.show_pos
if self.show_pos:
info_bits.append(self.format_pos())
if show_percent:
info_bits.append(self.format_pct())
if self.show_eta and self.eta_known and not self.finished:
info_bits.append(self.format_eta())
if self.item_show_func is not None:
item_info = self.item_show_func(self.current_item)
if item_info is not None:
info_bits.append(item_info)
return (
self.bar_template
% {
"label": self.label,
"bar": self.format_bar(),
"info": self.info_sep.join(info_bits),
}
).rstrip()
def render_progress(self):
from .termui import get_terminal_size
if self.is_hidden:
return
buf = []
# Update width in case the terminal has been resized
if self.autowidth:
old_width = self.width
self.width = 0
clutter_length = term_len(self.format_progress_line())
new_width = max(0, get_terminal_size()[0] - clutter_length)
if new_width < old_width:
buf.append(BEFORE_BAR)
buf.append(" " * self.max_width)
self.max_width = new_width
self.width = new_width
clear_width = self.width
if self.max_width is not None:
clear_width = self.max_width
buf.append(BEFORE_BAR)
line = self.format_progress_line()
line_len = term_len(line)
if self.max_width is None or self.max_width < line_len:
self.max_width = line_len
buf.append(line)
buf.append(" " * (clear_width - line_len))
line = "".join(buf)
# Render the line only if it changed.
if line != self._last_line and not self.is_fast():
self._last_line = line
echo(line, file=self.file, color=self.color, nl=False)
self.file.flush()
def make_step(self, n_steps):
self.pos += n_steps
if self.length_known and self.pos >= self.length:
self.finished = True
if (time.time() - self.last_eta) < 1.0:
return
self.last_eta = time.time()
# self.avg is a rolling list of length <= 7 of steps where steps are
# defined as time elapsed divided by the total progress through
# self.length.
if self.pos:
step = (time.time() - self.start) / self.pos
else:
step = time.time() - self.start
self.avg = self.avg[-6:] + [step]
self.eta_known = self.length_known
def update(self, n_steps):
self.make_step(n_steps)
self.render_progress()
def finish(self):
self.eta_known = 0
self.current_item = None
self.finished = True
def generator(self):
"""Return a generator which yields the items added to the bar
during construction, and updates the progress bar *after* the
yielded block returns.
"""
# WARNING: the iterator interface for `ProgressBar` relies on
# this and only works because this is a simple generator which
# doesn't create or manage additional state. If this function
# changes, the impact should be evaluated both against
# `iter(bar)` and `next(bar)`. `next()` in particular may call
# `self.generator()` repeatedly, and this must remain safe in
# order for that interface to work.
if not self.entered:
raise RuntimeError("You need to use progress bars in a with block.")
if self.is_hidden:
for rv in self.iter:
yield rv
else:
for rv in self.iter:
self.current_item = rv
yield rv
self.update(1)
self.finish()
self.render_progress()
def pager(generator, color=None):
"""Decide what method to use for paging through text."""
stdout = _default_text_stdout()
if not isatty(sys.stdin) or not isatty(stdout):
return _nullpager(stdout, generator, color)
pager_cmd = (os.environ.get("PAGER", None) or "").strip()
if pager_cmd:
if WIN:
return _tempfilepager(generator, pager_cmd, color)
return _pipepager(generator, pager_cmd, color)
if os.environ.get("TERM") in ("dumb", "emacs"):
return _nullpager(stdout, generator, color)
if WIN or sys.platform.startswith("os2"):
return _tempfilepager(generator, "more <", color)
if hasattr(os, "system") and os.system("(less) 2>/dev/null") == 0:
return _pipepager(generator, "less", color)
import tempfile
fd, filename = tempfile.mkstemp()
os.close(fd)
try:
if hasattr(os, "system") and os.system('more "{}"'.format(filename)) == 0:
return _pipepager(generator, "more", color)
return _nullpager(stdout, generator, color)
finally:
os.unlink(filename)
def _pipepager(generator, cmd, color):
"""Page through text by feeding it to another program. Invoking a
pager through this might support colors.
"""
import subprocess
env = dict(os.environ)
# If we're piping to less we might support colors under the
# condition that
cmd_detail = cmd.rsplit("/", 1)[-1].split()
if color is None and cmd_detail[0] == "less":
less_flags = "{}{}".format(os.environ.get("LESS", ""), " ".join(cmd_detail[1:]))
if not less_flags:
env["LESS"] = "-R"
color = True
elif "r" in less_flags or "R" in less_flags:
color = True
c = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, env=env)
encoding = get_best_encoding(c.stdin)
try:
for text in generator:
if not color:
text = strip_ansi(text)
c.stdin.write(text.encode(encoding, "replace"))
except (IOError, KeyboardInterrupt):
pass
else:
c.stdin.close()
# Less doesn't respect ^C, but catches it for its own UI purposes (aborting
# search or other commands inside less).
#
# That means when the user hits ^C, the parent process (click) terminates,
# but less is still alive, paging the output and messing up the terminal.
#
# If the user wants to make the pager exit on ^C, they should set
# `LESS='-K'`. It's not our decision to make.
while True:
try:
c.wait()
except KeyboardInterrupt:
pass
else:
break
def _tempfilepager(generator, cmd, color):
"""Page through text by invoking a program on a temporary file."""
import tempfile
filename = tempfile.mktemp()
# TODO: This never terminates if the passed generator never terminates.
text = "".join(generator)
if not color:
text = strip_ansi(text)
encoding = get_best_encoding(sys.stdout)
with open_stream(filename, "wb")[0] as f:
f.write(text.encode(encoding))
try:
os.system('{} "{}"'.format(cmd, filename))
finally:
os.unlink(filename)
def _nullpager(stream, generator, color):
"""Simply print unformatted text. This is the ultimate fallback."""
for text in generator:
if not color:
text = strip_ansi(text)
stream.write(text)
class Editor(object):
def __init__(self, editor=None, env=None, require_save=True, extension=".txt"):
self.editor = editor
self.env = env
self.require_save = require_save
self.extension = extension
def get_editor(self):
if self.editor is not None:
return self.editor
for key in "VISUAL", "EDITOR":
rv = os.environ.get(key)
if rv:
return rv
if WIN:
return "notepad"
for editor in "sensible-editor", "vim", "nano":
if os.system("which {} >/dev/null 2>&1".format(editor)) == 0:
return editor
return "vi"
def edit_file(self, filename):
import subprocess
editor = self.get_editor()
if self.env:
environ = os.environ.copy()
environ.update(self.env)
else:
environ = None
try:
c = subprocess.Popen(
'{} "{}"'.format(editor, filename), env=environ, shell=True,
)
exit_code = c.wait()
if exit_code != 0:
raise ClickException("{}: Editing failed!".format(editor))
except OSError as e:
raise ClickException("{}: Editing failed: {}".format(editor, e))
def edit(self, text):
import tempfile
text = text or ""
if text and not text.endswith("\n"):
text += "\n"
fd, name = tempfile.mkstemp(prefix="editor-", suffix=self.extension)
try:
if WIN:
encoding = "utf-8-sig"
text = text.replace("\n", "\r\n")
else:
encoding = "utf-8"
text = text.encode(encoding)
f = os.fdopen(fd, "wb")
f.write(text)
f.close()
timestamp = os.path.getmtime(name)
self.edit_file(name)
if self.require_save and os.path.getmtime(name) == timestamp:
return None
f = open(name, "rb")
try:
rv = f.read()
finally:
f.close()
return rv.decode("utf-8-sig").replace("\r\n", "\n")
finally:
os.unlink(name)
def open_url(url, wait=False, locate=False):
import subprocess
def _unquote_file(url):
try:
import urllib
except ImportError:
import urllib
if url.startswith("file://"):
url = urllib.unquote(url[7:])
return url
if sys.platform == "darwin":
args = ["open"]
if wait:
args.append("-W")
if locate:
args.append("-R")
args.append(_unquote_file(url))
null = open("/dev/null", "w")
try:
return subprocess.Popen(args, stderr=null).wait()
finally:
null.close()
elif WIN:
if locate:
url = _unquote_file(url)
args = 'explorer /select,"{}"'.format(_unquote_file(url.replace('"', "")))
else:
args = 'start {} "" "{}"'.format(
"/WAIT" if wait else "", url.replace('"', "")
)
return os.system(args)
elif CYGWIN:
if locate:
url = _unquote_file(url)
args = 'cygstart "{}"'.format(os.path.dirname(url).replace('"', ""))
else:
args = 'cygstart {} "{}"'.format("-w" if wait else "", url.replace('"', ""))
return os.system(args)
try:
if locate:
url = os.path.dirname(_unquote_file(url)) or "."
else:
url = _unquote_file(url)
c = subprocess.Popen(["xdg-open", url])
if wait:
return c.wait()
return 0
except OSError:
if url.startswith(("http://", "https://")) and not locate and not wait:
import webbrowser
webbrowser.open(url)
return 0
return 1
def _translate_ch_to_exc(ch):
if ch == u"\x03":
raise KeyboardInterrupt()
if ch == u"\x04" and not WIN: # Unix-like, Ctrl+D
raise EOFError()
if ch == u"\x1a" and WIN: # Windows, Ctrl+Z
raise EOFError()
if WIN:
import msvcrt
@contextlib.contextmanager
def raw_terminal():
yield
def getchar(echo):
# The function `getch` will return a bytes object corresponding to
# the pressed character. Since Windows 10 build 1803, it will also
# return \x00 when called a second time after pressing a regular key.
#
# `getwch` does not share this probably-bugged behavior. Moreover, it
# returns a Unicode object by default, which is what we want.
#
# Either of these functions will return \x00 or \xe0 to indicate
# a special key, and you need to call the same function again to get
# the "rest" of the code. The fun part is that \u00e0 is
# "latin small letter a with grave", so if you type that on a French
# keyboard, you _also_ get a \xe0.
# E.g., consider the Up arrow. This returns \xe0 and then \x48. The
# resulting Unicode string reads as "a with grave" + "capital H".
# This is indistinguishable from when the user actually types
# "a with grave" and then "capital H".
#
# When \xe0 is returned, we assume it's part of a special-key sequence
# and call `getwch` again, but that means that when the user types
# the \u00e0 character, `getchar` doesn't return until a second
# character is typed.
# The alternative is returning immediately, but that would mess up
# cross-platform handling of arrow keys and others that start with
# \xe0. Another option is using `getch`, but then we can't reliably
# read non-ASCII characters, because return values of `getch` are
# limited to the current 8-bit codepage.
#
# Anyway, Click doesn't claim to do this Right(tm), and using `getwch`
# is doing the right thing in more situations than with `getch`.
if echo:
func = msvcrt.getwche
else:
func = msvcrt.getwch
rv = func()
if rv in (u"\x00", u"\xe0"):
# \x00 and \xe0 are control characters that indicate special key,
# see above.
rv += func()
_translate_ch_to_exc(rv)
return rv
else:
import tty
import termios
@contextlib.contextmanager
def raw_terminal():
if not isatty(sys.stdin):
f = open("/dev/tty")
fd = f.fileno()
else:
fd = sys.stdin.fileno()
f = None
try:
old_settings = termios.tcgetattr(fd)
try:
tty.setraw(fd)
yield fd
finally:
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
sys.stdout.flush()
if f is not None:
f.close()
except termios.error:
pass
def getchar(echo):
with raw_terminal() as fd:
ch = os.read(fd, 32)
ch = ch.decode(get_best_encoding(sys.stdin), "replace")
if echo and isatty(sys.stdout):
sys.stdout.write(ch)
_translate_ch_to_exc(ch)
return ch
================================================
FILE: metaflow/_vendor/click/_textwrap.py
================================================
import textwrap
from contextlib import contextmanager
class TextWrapper(textwrap.TextWrapper):
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
space_left = max(width - cur_len, 1)
if self.break_long_words:
last = reversed_chunks[-1]
cut = last[:space_left]
res = last[space_left:]
cur_line.append(cut)
reversed_chunks[-1] = res
elif not cur_line:
cur_line.append(reversed_chunks.pop())
@contextmanager
def extra_indent(self, indent):
old_initial_indent = self.initial_indent
old_subsequent_indent = self.subsequent_indent
self.initial_indent += indent
self.subsequent_indent += indent
try:
yield
finally:
self.initial_indent = old_initial_indent
self.subsequent_indent = old_subsequent_indent
def indent_only(self, text):
rv = []
for idx, line in enumerate(text.splitlines()):
indent = self.initial_indent
if idx > 0:
indent = self.subsequent_indent
rv.append(indent + line)
return "\n".join(rv)
================================================
FILE: metaflow/_vendor/click/_unicodefun.py
================================================
import codecs
import os
import sys
from ._compat import PY2
def _find_unicode_literals_frame():
import __future__
if not hasattr(sys, "_getframe"): # not all Python implementations have it
return 0
frm = sys._getframe(1)
idx = 1
while frm is not None:
if frm.f_globals.get("__name__", "").startswith("click."):
frm = frm.f_back
idx += 1
elif frm.f_code.co_flags & __future__.unicode_literals.compiler_flag:
return idx
else:
break
return 0
def _check_for_unicode_literals():
if not __debug__:
return
from . import disable_unicode_literals_warning
if not PY2 or disable_unicode_literals_warning:
return
bad_frame = _find_unicode_literals_frame()
if bad_frame <= 0:
return
from warnings import warn
warn(
Warning(
"Click detected the use of the unicode_literals __future__"
" import. This is heavily discouraged because it can"
" introduce subtle bugs in your code. You should instead"
' use explicit u"" literals for your unicode strings. For'
" more information see"
" https://click.palletsprojects.com/python3/"
),
stacklevel=bad_frame,
)
def _verify_python3_env():
"""Ensures that the environment is good for unicode on Python 3."""
if PY2:
return
try:
import locale
fs_enc = codecs.lookup(locale.getpreferredencoding()).name
except Exception:
fs_enc = "ascii"
if fs_enc != "ascii":
return
extra = ""
if os.name == "posix":
import subprocess
try:
rv = subprocess.Popen(
["locale", "-a"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
).communicate()[0]
except OSError:
rv = b""
good_locales = set()
has_c_utf8 = False
# Make sure we're operating on text here.
if isinstance(rv, bytes):
rv = rv.decode("ascii", "replace")
for line in rv.splitlines():
locale = line.strip()
if locale.lower().endswith((".utf-8", ".utf8")):
good_locales.add(locale)
if locale.lower() in ("c.utf8", "c.utf-8"):
has_c_utf8 = True
extra += "\n\n"
if not good_locales:
extra += (
"Additional information: on this system no suitable"
" UTF-8 locales were discovered. This most likely"
" requires resolving by reconfiguring the locale"
" system."
)
elif has_c_utf8:
extra += (
"This system supports the C.UTF-8 locale which is"
" recommended. You might be able to resolve your issue"
" by exporting the following environment variables:\n\n"
" export LC_ALL=C.UTF-8\n"
" export LANG=C.UTF-8"
)
else:
extra += (
"This system lists a couple of UTF-8 supporting locales"
" that you can pick from. The following suitable"
" locales were discovered: {}".format(", ".join(sorted(good_locales)))
)
bad_locale = None
for locale in os.environ.get("LC_ALL"), os.environ.get("LANG"):
if locale and locale.lower().endswith((".utf-8", ".utf8")):
bad_locale = locale
if locale is not None:
break
if bad_locale is not None:
extra += (
"\n\nClick discovered that you exported a UTF-8 locale"
" but the locale system could not pick up from it"
" because it does not exist. The exported locale is"
" '{}' but it is not supported".format(bad_locale)
)
raise RuntimeError(
"Click will abort further execution because Python 3 was"
" configured to use ASCII as encoding for the environment."
" Consult https://click.palletsprojects.com/python3/ for"
" mitigation steps.{}".format(extra)
)
================================================
FILE: metaflow/_vendor/click/_winconsole.py
================================================
# -*- coding: utf-8 -*-
# This module is based on the excellent work by Adam Bartoš who
# provided a lot of what went into the implementation here in
# the discussion to issue1602 in the Python bug tracker.
#
# There are some general differences in regards to how this works
# compared to the original patches as we do not need to patch
# the entire interpreter but just work in our little world of
# echo and prmopt.
import ctypes
import io
import os
import sys
import time
import zlib
from ctypes import byref
from ctypes import c_char
from ctypes import c_char_p
from ctypes import c_int
from ctypes import c_ssize_t
from ctypes import c_ulong
from ctypes import c_void_p
from ctypes import POINTER
from ctypes import py_object
from ctypes import windll
from ctypes import WinError
from ctypes import WINFUNCTYPE
from ctypes.wintypes import DWORD
from ctypes.wintypes import HANDLE
from ctypes.wintypes import LPCWSTR
from ctypes.wintypes import LPWSTR
import msvcrt
from ._compat import _NonClosingTextIOWrapper
from ._compat import PY2
from ._compat import text_type
try:
from ctypes import pythonapi
PyObject_GetBuffer = pythonapi.PyObject_GetBuffer
PyBuffer_Release = pythonapi.PyBuffer_Release
except ImportError:
pythonapi = None
c_ssize_p = POINTER(c_ssize_t)
kernel32 = windll.kernel32
GetStdHandle = kernel32.GetStdHandle
ReadConsoleW = kernel32.ReadConsoleW
WriteConsoleW = kernel32.WriteConsoleW
GetConsoleMode = kernel32.GetConsoleMode
GetLastError = kernel32.GetLastError
GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(
("CommandLineToArgvW", windll.shell32)
)
LocalFree = WINFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p)(
("LocalFree", windll.kernel32)
)
STDIN_HANDLE = GetStdHandle(-10)
STDOUT_HANDLE = GetStdHandle(-11)
STDERR_HANDLE = GetStdHandle(-12)
PyBUF_SIMPLE = 0
PyBUF_WRITABLE = 1
ERROR_SUCCESS = 0
ERROR_NOT_ENOUGH_MEMORY = 8
ERROR_OPERATION_ABORTED = 995
STDIN_FILENO = 0
STDOUT_FILENO = 1
STDERR_FILENO = 2
EOF = b"\x1a"
MAX_BYTES_WRITTEN = 32767
class Py_buffer(ctypes.Structure):
_fields_ = [
("buf", c_void_p),
("obj", py_object),
("len", c_ssize_t),
("itemsize", c_ssize_t),
("readonly", c_int),
("ndim", c_int),
("format", c_char_p),
("shape", c_ssize_p),
("strides", c_ssize_p),
("suboffsets", c_ssize_p),
("internal", c_void_p),
]
if PY2:
_fields_.insert(-1, ("smalltable", c_ssize_t * 2))
# On PyPy we cannot get buffers so our ability to operate here is
# serverly limited.
if pythonapi is None:
get_buffer = None
else:
def get_buffer(obj, writable=False):
buf = Py_buffer()
flags = PyBUF_WRITABLE if writable else PyBUF_SIMPLE
PyObject_GetBuffer(py_object(obj), byref(buf), flags)
try:
buffer_type = c_char * buf.len
return buffer_type.from_address(buf.buf)
finally:
PyBuffer_Release(byref(buf))
class _WindowsConsoleRawIOBase(io.RawIOBase):
def __init__(self, handle):
self.handle = handle
def isatty(self):
io.RawIOBase.isatty(self)
return True
class _WindowsConsoleReader(_WindowsConsoleRawIOBase):
def readable(self):
return True
def readinto(self, b):
bytes_to_be_read = len(b)
if not bytes_to_be_read:
return 0
elif bytes_to_be_read % 2:
raise ValueError(
"cannot read odd number of bytes from UTF-16-LE encoded console"
)
buffer = get_buffer(b, writable=True)
code_units_to_be_read = bytes_to_be_read // 2
code_units_read = c_ulong()
rv = ReadConsoleW(
HANDLE(self.handle),
buffer,
code_units_to_be_read,
byref(code_units_read),
None,
)
if GetLastError() == ERROR_OPERATION_ABORTED:
# wait for KeyboardInterrupt
time.sleep(0.1)
if not rv:
raise OSError("Windows error: {}".format(GetLastError()))
if buffer[0] == EOF:
return 0
return 2 * code_units_read.value
class _WindowsConsoleWriter(_WindowsConsoleRawIOBase):
def writable(self):
return True
@staticmethod
def _get_error_message(errno):
if errno == ERROR_SUCCESS:
return "ERROR_SUCCESS"
elif errno == ERROR_NOT_ENOUGH_MEMORY:
return "ERROR_NOT_ENOUGH_MEMORY"
return "Windows error {}".format(errno)
def write(self, b):
bytes_to_be_written = len(b)
buf = get_buffer(b)
code_units_to_be_written = min(bytes_to_be_written, MAX_BYTES_WRITTEN) // 2
code_units_written = c_ulong()
WriteConsoleW(
HANDLE(self.handle),
buf,
code_units_to_be_written,
byref(code_units_written),
None,
)
bytes_written = 2 * code_units_written.value
if bytes_written == 0 and bytes_to_be_written > 0:
raise OSError(self._get_error_message(GetLastError()))
return bytes_written
class ConsoleStream(object):
def __init__(self, text_stream, byte_stream):
self._text_stream = text_stream
self.buffer = byte_stream
@property
def name(self):
return self.buffer.name
def write(self, x):
if isinstance(x, text_type):
return self._text_stream.write(x)
try:
self.flush()
except Exception:
pass
return self.buffer.write(x)
def writelines(self, lines):
for line in lines:
self.write(line)
def __getattr__(self, name):
return getattr(self._text_stream, name)
def isatty(self):
return self.buffer.isatty()
def __repr__(self):
return " # pre-release
[-_\.]?
(?P(a|b|c|rc|alpha|beta|pre|preview))
[-_\.]?
(?P[0-9]+)?
)?
(?P # post release
(?:-(?P[0-9]+))
|
(?:
[-_\.]?
(?Ppost|rev|r)
[-_\.]?
(?P[0-9]+)?
)
)?
(?P # dev release
[-_\.]?
(?Pdev)
[-_\.]?
(?P[0-9]+)?
)?
)
(?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
"""
VERSION_PATTERN = _VERSION_PATTERN
"""
A string containing the regular expression used to match a valid version.
The pattern is not anchored at either end, and is intended for embedding in larger
expressions (for example, matching a version number as part of a file name). The
regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE``
flags set.
:meta hide-value:
"""
class Version(_BaseVersion):
"""This class abstracts handling of a project's versions.
A :class:`Version` instance is comparison aware and can be compared and
sorted using the standard Python interfaces.
>>> v1 = Version("1.0a5")
>>> v2 = Version("1.0")
>>> v1
>>> v2
>>> v1 < v2
True
>>> v1 == v2
False
>>> v1 > v2
False
>>> v1 >= v2
False
>>> v1 <= v2
True
"""
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
def __init__(self, version: str) -> None:
"""Initialize a Version object.
:param version:
The string representation of a version which will be parsed and normalized
before use.
:raises InvalidVersion:
If the ``version`` does not conform to PEP 440 in any way then this
exception will be raised.
"""
# Validate the version and parse it into pieces
match = self._regex.search(version)
if not match:
raise InvalidVersion(f"Invalid version: '{version}'")
# Store the parsed out pieces of the version
self._version = _Version(
epoch=int(match.group("epoch")) if match.group("epoch") else 0,
release=tuple(int(i) for i in match.group("release").split(".")),
pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
post=_parse_letter_version(
match.group("post_l"), match.group("post_n1") or match.group("post_n2")
),
dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
local=_parse_local_version(match.group("local")),
)
# Generate a key which will be used for sorting
self._key = _cmpkey(
self._version.epoch,
self._version.release,
self._version.pre,
self._version.post,
self._version.dev,
self._version.local,
)
def __repr__(self) -> str:
"""A representation of the Version that shows all internal state.
>>> Version('1.0.0')
"""
return f""
def __str__(self) -> str:
"""A string representation of the version that can be rounded-tripped.
>>> str(Version("1.0a5"))
'1.0a5'
"""
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join(str(x) for x in self.release))
# Pre-release
if self.pre is not None:
parts.append("".join(str(x) for x in self.pre))
# Post-release
if self.post is not None:
parts.append(f".post{self.post}")
# Development release
if self.dev is not None:
parts.append(f".dev{self.dev}")
# Local version segment
if self.local is not None:
parts.append(f"+{self.local}")
return "".join(parts)
@property
def epoch(self) -> int:
"""The epoch of the version.
>>> Version("2.0.0").epoch
0
>>> Version("1!2.0.0").epoch
1
"""
_epoch: int = self._version.epoch
return _epoch
@property
def release(self) -> Tuple[int, ...]:
"""The components of the "release" segment of the version.
>>> Version("1.2.3").release
(1, 2, 3)
>>> Version("2.0.0").release
(2, 0, 0)
>>> Version("1!2.0.0.post0").release
(2, 0, 0)
Includes trailing zeroes but not the epoch or any pre-release / development /
post-release suffixes.
"""
_release: Tuple[int, ...] = self._version.release
return _release
@property
def pre(self) -> Optional[Tuple[str, int]]:
"""The pre-release segment of the version.
>>> print(Version("1.2.3").pre)
None
>>> Version("1.2.3a1").pre
('a', 1)
>>> Version("1.2.3b1").pre
('b', 1)
>>> Version("1.2.3rc1").pre
('rc', 1)
"""
_pre: Optional[Tuple[str, int]] = self._version.pre
return _pre
@property
def post(self) -> Optional[int]:
"""The post-release number of the version.
>>> print(Version("1.2.3").post)
None
>>> Version("1.2.3.post1").post
1
"""
return self._version.post[1] if self._version.post else None
@property
def dev(self) -> Optional[int]:
"""The development number of the version.
>>> print(Version("1.2.3").dev)
None
>>> Version("1.2.3.dev1").dev
1
"""
return self._version.dev[1] if self._version.dev else None
@property
def local(self) -> Optional[str]:
"""The local version segment of the version.
>>> print(Version("1.2.3").local)
None
>>> Version("1.2.3+abc").local
'abc'
"""
if self._version.local:
return ".".join(str(x) for x in self._version.local)
else:
return None
@property
def public(self) -> str:
"""The public portion of the version.
>>> Version("1.2.3").public
'1.2.3'
>>> Version("1.2.3+abc").public
'1.2.3'
>>> Version("1.2.3+abc.dev1").public
'1.2.3'
"""
return str(self).split("+", 1)[0]
@property
def base_version(self) -> str:
"""The "base version" of the version.
>>> Version("1.2.3").base_version
'1.2.3'
>>> Version("1.2.3+abc").base_version
'1.2.3'
>>> Version("1!1.2.3+abc.dev1").base_version
'1!1.2.3'
The "base version" is the public version of the project without any pre or post
release markers.
"""
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join(str(x) for x in self.release))
return "".join(parts)
@property
def is_prerelease(self) -> bool:
"""Whether this version is a pre-release.
>>> Version("1.2.3").is_prerelease
False
>>> Version("1.2.3a1").is_prerelease
True
>>> Version("1.2.3b1").is_prerelease
True
>>> Version("1.2.3rc1").is_prerelease
True
>>> Version("1.2.3dev1").is_prerelease
True
"""
return self.dev is not None or self.pre is not None
@property
def is_postrelease(self) -> bool:
"""Whether this version is a post-release.
>>> Version("1.2.3").is_postrelease
False
>>> Version("1.2.3.post1").is_postrelease
True
"""
return self.post is not None
@property
def is_devrelease(self) -> bool:
"""Whether this version is a development release.
>>> Version("1.2.3").is_devrelease
False
>>> Version("1.2.3.dev1").is_devrelease
True
"""
return self.dev is not None
@property
def major(self) -> int:
"""The first item of :attr:`release` or ``0`` if unavailable.
>>> Version("1.2.3").major
1
"""
return self.release[0] if len(self.release) >= 1 else 0
@property
def minor(self) -> int:
"""The second item of :attr:`release` or ``0`` if unavailable.
>>> Version("1.2.3").minor
2
>>> Version("1").minor
0
"""
return self.release[1] if len(self.release) >= 2 else 0
@property
def micro(self) -> int:
"""The third item of :attr:`release` or ``0`` if unavailable.
>>> Version("1.2.3").micro
3
>>> Version("1").micro
0
"""
return self.release[2] if len(self.release) >= 3 else 0
def _parse_letter_version(
letter: str, number: Union[str, bytes, SupportsInt]
) -> Optional[Tuple[str, int]]:
if letter:
# We consider there to be an implicit 0 in a pre-release if there is
# not a numeral associated with it.
if number is None:
number = 0
# We normalize any letters to their lower case form
letter = letter.lower()
# We consider some words to be alternate spellings of other words and
# in those cases we want to normalize the spellings to our preferred
# spelling.
if letter == "alpha":
letter = "a"
elif letter == "beta":
letter = "b"
elif letter in ["c", "pre", "preview"]:
letter = "rc"
elif letter in ["rev", "r"]:
letter = "post"
return letter, int(number)
if not letter and number:
# We assume if we are given a number, but we are not given a letter
# then this is using the implicit post release syntax (e.g. 1.0-1)
letter = "post"
return letter, int(number)
return None
_local_version_separators = re.compile(r"[\._-]")
def _parse_local_version(local: str) -> Optional[LocalType]:
"""
Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
"""
if local is not None:
return tuple(
part.lower() if not part.isdigit() else int(part)
for part in _local_version_separators.split(local)
)
return None
def _cmpkey(
epoch: int,
release: Tuple[int, ...],
pre: Optional[Tuple[str, int]],
post: Optional[Tuple[str, int]],
dev: Optional[Tuple[str, int]],
local: Optional[Tuple[SubLocalType]],
) -> CmpKey:
# When we compare a release version, we want to compare it with all of the
# trailing zeros removed. So we'll use a reverse the list, drop all the now
# leading zeros until we come to something non zero, then take the rest
# re-reverse it back into the correct order and make it a tuple and use
# that for our sorting key.
_release = tuple(
reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
)
# We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
# We'll do this by abusing the pre segment, but we _only_ want to do this
# if there is not a pre or a post segment. If we have one of those then
# the normal sorting rules will handle this case correctly.
if pre is None and post is None and dev is not None:
_pre: PrePostDevType = NegativeInfinity
# Versions without a pre-release (except as noted above) should sort after
# those with one.
elif pre is None:
_pre = Infinity
else:
_pre = pre
# Versions without a post segment should sort before those with one.
if post is None:
_post: PrePostDevType = NegativeInfinity
else:
_post = post
# Versions without a development segment should sort after those with one.
if dev is None:
_dev: PrePostDevType = Infinity
else:
_dev = dev
if local is None:
# Versions without a local segment should sort before those with one.
_local: LocalType = NegativeInfinity
else:
# Versions with a local segment need that segment parsed to implement
# the sorting rules in PEP440.
# - Alpha numeric segments sort before numeric segments
# - Alpha numeric segments sort lexicographically
# - Numeric segments sort numerically
# - Shorter versions sort before longer versions when the prefixes
# match exactly
_local = tuple(
(i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
)
return epoch, _release, _pre, _post, _dev, _local
================================================
FILE: metaflow/_vendor/packaging.LICENSE
================================================
This software is made available under the terms of *either* of the licenses
found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made
under the terms of *both* these licenses.
================================================
FILE: metaflow/_vendor/packaging.LICENSE.APACHE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
================================================
FILE: metaflow/_vendor/packaging.LICENSE.BSD
================================================
Copyright (c) Donald Stufft and individual contributors.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: metaflow/_vendor/pip.LICENSE
================================================
Copyright (c) 2008-present The pip developers (see AUTHORS.txt file)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
================================================
FILE: metaflow/_vendor/standard-imghdr.LICENSE
================================================
Copyright © 2001-2023 Python Software Foundation; All Rights Reserved
This code originally taken from the Python 3.11.3 distribution
and it is therefore now released under the following Python-style
license:
1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and
the Individual or Organization ("Licensee") accessing and
otherwise using nntplib software in source or binary form and
its associated documentation.
2. Subject to the terms and conditions of this License Agreement, PSF hereby
grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
analyze, test, perform and/or display publicly, prepare derivative works,
distribute, and otherwise use nntplib alone or in any derivative
version, provided, however, that PSF's License Agreement and PSF's notice of
copyright, i.e., "Copyright © 2001-2023 Python Software Foundation; All Rights
Reserved" are retained in nntplib alone or in any derivative version
prepared by Licensee.
3. In the event Licensee prepares a derivative work that is based on or
incorporates nntplib or any part thereof, and wants to make the
derivative work available to others as provided herein, then Licensee hereby
agrees to include in any such work a brief summary of the
changes made to nntplib.
4. PSF is making nntplib available to Licensee on an "AS IS" basis.
PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF
EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR
WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE
USE OF NNTPLIB WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF NNTPLIB
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF
MODIFYING, DISTRIBUTING, OR OTHERWISE USING NNTPLIB, OR ANY DERIVATIVE
THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material breach of
its terms and conditions.
7. Nothing in this License Agreement shall be deemed to create any relationship
of agency, partnership, or joint venture between PSF and Licensee. This License
Agreement does not grant permission to use PSF trademarks or trade name in a
trademark sense to endorse or promote products or services of Licensee, or any
third party.
8. By copying, installing or otherwise using nntplib, Licensee agrees
to be bound by the terms and conditions of this License Agreement.
================================================
FILE: metaflow/_vendor/typeguard/__init__.py
================================================
import os
from typing import Any
from ._checkers import TypeCheckerCallable as TypeCheckerCallable
from ._checkers import TypeCheckLookupCallback as TypeCheckLookupCallback
from ._checkers import check_type_internal as check_type_internal
from ._checkers import checker_lookup_functions as checker_lookup_functions
from ._checkers import load_plugins as load_plugins
from ._config import CollectionCheckStrategy as CollectionCheckStrategy
from ._config import ForwardRefPolicy as ForwardRefPolicy
from ._config import TypeCheckConfiguration as TypeCheckConfiguration
from ._decorators import typechecked as typechecked
from ._decorators import typeguard_ignore as typeguard_ignore
from ._exceptions import InstrumentationWarning as InstrumentationWarning
from ._exceptions import TypeCheckError as TypeCheckError
from ._exceptions import TypeCheckWarning as TypeCheckWarning
from ._exceptions import TypeHintWarning as TypeHintWarning
from ._functions import TypeCheckFailCallback as TypeCheckFailCallback
from ._functions import check_type as check_type
from ._functions import warn_on_error as warn_on_error
from ._importhook import ImportHookManager as ImportHookManager
from ._importhook import TypeguardFinder as TypeguardFinder
from ._importhook import install_import_hook as install_import_hook
from ._memo import TypeCheckMemo as TypeCheckMemo
from ._suppression import suppress_type_checks as suppress_type_checks
from ._utils import Unset as Unset
# Re-export imports so they look like they live directly in this package
for value in list(locals().values()):
if getattr(value, "__module__", "").startswith(f"{__name__}."):
value.__module__ = __name__
config: TypeCheckConfiguration
def __getattr__(name: str) -> Any:
if name == "config":
from ._config import global_config
return global_config
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
# Automatically load checker lookup functions unless explicitly disabled
if "TYPEGUARD_DISABLE_PLUGIN_AUTOLOAD" not in os.environ:
load_plugins()
================================================
FILE: metaflow/_vendor/typeguard/_checkers.py
================================================
from __future__ import annotations
import collections.abc
import inspect
import sys
import types
import typing
import warnings
from enum import Enum
from inspect import Parameter, isclass, isfunction
from io import BufferedIOBase, IOBase, RawIOBase, TextIOBase
from itertools import zip_longest
from textwrap import indent
from typing import (
IO,
AbstractSet,
Any,
BinaryIO,
Callable,
Dict,
ForwardRef,
List,
Mapping,
MutableMapping,
NewType,
Optional,
Sequence,
Set,
TextIO,
Tuple,
Type,
TypeVar,
Union,
)
from unittest.mock import Mock
from metaflow._vendor import typing_extensions
# Must use this because typing.is_typeddict does not recognize
# TypedDict from typing_extensions, and as of version 4.12.0
# typing_extensions.TypedDict is different from typing.TypedDict
# on all versions.
from metaflow._vendor.typing_extensions import is_typeddict
from ._config import ForwardRefPolicy
from ._exceptions import TypeCheckError, TypeHintWarning
from ._memo import TypeCheckMemo
from ._utils import evaluate_forwardref, get_stacklevel, get_type_name, qualified_name
if sys.version_info >= (3, 11):
from typing import (
Annotated,
NotRequired,
TypeAlias,
get_args,
get_origin,
)
SubclassableAny = Any
else:
from metaflow._vendor.typing_extensions import (
Annotated,
NotRequired,
TypeAlias,
get_args,
get_origin,
)
from metaflow._vendor.typing_extensions import Any as SubclassableAny
if sys.version_info >= (3, 10):
from importlib.metadata import entry_points
from typing import ParamSpec
else:
from metaflow._vendor.importlib_metadata import entry_points
from metaflow._vendor.typing_extensions import ParamSpec
TypeCheckerCallable: TypeAlias = Callable[
[Any, Any, Tuple[Any, ...], TypeCheckMemo], Any
]
TypeCheckLookupCallback: TypeAlias = Callable[
[Any, Tuple[Any, ...], Tuple[Any, ...]], Optional[TypeCheckerCallable]
]
checker_lookup_functions: list[TypeCheckLookupCallback] = []
generic_alias_types: tuple[type, ...] = (type(List), type(List[Any]))
if sys.version_info >= (3, 9):
generic_alias_types += (types.GenericAlias,)
# Sentinel
_missing = object()
# Lifted from mypy.sharedparse
BINARY_MAGIC_METHODS = {
"__add__",
"__and__",
"__cmp__",
"__divmod__",
"__div__",
"__eq__",
"__floordiv__",
"__ge__",
"__gt__",
"__iadd__",
"__iand__",
"__idiv__",
"__ifloordiv__",
"__ilshift__",
"__imatmul__",
"__imod__",
"__imul__",
"__ior__",
"__ipow__",
"__irshift__",
"__isub__",
"__itruediv__",
"__ixor__",
"__le__",
"__lshift__",
"__lt__",
"__matmul__",
"__mod__",
"__mul__",
"__ne__",
"__or__",
"__pow__",
"__radd__",
"__rand__",
"__rdiv__",
"__rfloordiv__",
"__rlshift__",
"__rmatmul__",
"__rmod__",
"__rmul__",
"__ror__",
"__rpow__",
"__rrshift__",
"__rshift__",
"__rsub__",
"__rtruediv__",
"__rxor__",
"__sub__",
"__truediv__",
"__xor__",
}
def check_callable(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not callable(value):
raise TypeCheckError("is not callable")
if args:
try:
signature = inspect.signature(value)
except (TypeError, ValueError):
return
argument_types = args[0]
if isinstance(argument_types, list) and not any(
type(item) is ParamSpec for item in argument_types
):
# The callable must not have keyword-only arguments without defaults
unfulfilled_kwonlyargs = [
param.name
for param in signature.parameters.values()
if param.kind == Parameter.KEYWORD_ONLY
and param.default == Parameter.empty
]
if unfulfilled_kwonlyargs:
raise TypeCheckError(
f"has mandatory keyword-only arguments in its declaration: "
f'{", ".join(unfulfilled_kwonlyargs)}'
)
num_positional_args = num_mandatory_pos_args = 0
has_varargs = False
for param in signature.parameters.values():
if param.kind in (
Parameter.POSITIONAL_ONLY,
Parameter.POSITIONAL_OR_KEYWORD,
):
num_positional_args += 1
if param.default is Parameter.empty:
num_mandatory_pos_args += 1
elif param.kind == Parameter.VAR_POSITIONAL:
has_varargs = True
if num_mandatory_pos_args > len(argument_types):
raise TypeCheckError(
f"has too many mandatory positional arguments in its declaration; "
f"expected {len(argument_types)} but {num_mandatory_pos_args} "
f"mandatory positional argument(s) declared"
)
elif not has_varargs and num_positional_args < len(argument_types):
raise TypeCheckError(
f"has too few arguments in its declaration; expected "
f"{len(argument_types)} but {num_positional_args} argument(s) "
f"declared"
)
def check_mapping(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if origin_type is Dict or origin_type is dict:
if not isinstance(value, dict):
raise TypeCheckError("is not a dict")
if origin_type is MutableMapping or origin_type is collections.abc.MutableMapping:
if not isinstance(value, collections.abc.MutableMapping):
raise TypeCheckError("is not a mutable mapping")
elif not isinstance(value, collections.abc.Mapping):
raise TypeCheckError("is not a mapping")
if args:
key_type, value_type = args
if key_type is not Any or value_type is not Any:
samples = memo.config.collection_check_strategy.iterate_samples(
value.items()
)
for k, v in samples:
try:
check_type_internal(k, key_type, memo)
except TypeCheckError as exc:
exc.append_path_element(f"key {k!r}")
raise
try:
check_type_internal(v, value_type, memo)
except TypeCheckError as exc:
exc.append_path_element(f"value of key {k!r}")
raise
def check_typed_dict(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isinstance(value, dict):
raise TypeCheckError("is not a dict")
declared_keys = frozenset(origin_type.__annotations__)
if hasattr(origin_type, "__required_keys__"):
required_keys = set(origin_type.__required_keys__)
else: # py3.8 and lower
required_keys = set(declared_keys) if origin_type.__total__ else set()
existing_keys = set(value)
extra_keys = existing_keys - declared_keys
if extra_keys:
keys_formatted = ", ".join(f'"{key}"' for key in sorted(extra_keys, key=repr))
raise TypeCheckError(f"has unexpected extra key(s): {keys_formatted}")
# Detect NotRequired fields which are hidden by get_type_hints()
type_hints: dict[str, type] = {}
for key, annotation in origin_type.__annotations__.items():
if isinstance(annotation, ForwardRef):
annotation = evaluate_forwardref(annotation, memo)
if get_origin(annotation) is NotRequired:
required_keys.discard(key)
annotation = get_args(annotation)[0]
type_hints[key] = annotation
missing_keys = required_keys - existing_keys
if missing_keys:
keys_formatted = ", ".join(f'"{key}"' for key in sorted(missing_keys, key=repr))
raise TypeCheckError(f"is missing required key(s): {keys_formatted}")
for key, argtype in type_hints.items():
argvalue = value.get(key, _missing)
if argvalue is not _missing:
try:
check_type_internal(argvalue, argtype, memo)
except TypeCheckError as exc:
exc.append_path_element(f"value of key {key!r}")
raise
def check_list(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isinstance(value, list):
raise TypeCheckError("is not a list")
if args and args != (Any,):
samples = memo.config.collection_check_strategy.iterate_samples(value)
for i, v in enumerate(samples):
try:
check_type_internal(v, args[0], memo)
except TypeCheckError as exc:
exc.append_path_element(f"item {i}")
raise
def check_sequence(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isinstance(value, collections.abc.Sequence):
raise TypeCheckError("is not a sequence")
if args and args != (Any,):
samples = memo.config.collection_check_strategy.iterate_samples(value)
for i, v in enumerate(samples):
try:
check_type_internal(v, args[0], memo)
except TypeCheckError as exc:
exc.append_path_element(f"item {i}")
raise
def check_set(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if origin_type is frozenset:
if not isinstance(value, frozenset):
raise TypeCheckError("is not a frozenset")
elif not isinstance(value, AbstractSet):
raise TypeCheckError("is not a set")
if args and args != (Any,):
samples = memo.config.collection_check_strategy.iterate_samples(value)
for v in samples:
try:
check_type_internal(v, args[0], memo)
except TypeCheckError as exc:
exc.append_path_element(f"[{v}]")
raise
def check_tuple(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
# Specialized check for NamedTuples
if field_types := getattr(origin_type, "__annotations__", None):
if not isinstance(value, origin_type):
raise TypeCheckError(
f"is not a named tuple of type {qualified_name(origin_type)}"
)
for name, field_type in field_types.items():
try:
check_type_internal(getattr(value, name), field_type, memo)
except TypeCheckError as exc:
exc.append_path_element(f"attribute {name!r}")
raise
return
elif not isinstance(value, tuple):
raise TypeCheckError("is not a tuple")
if args:
use_ellipsis = args[-1] is Ellipsis
tuple_params = args[: -1 if use_ellipsis else None]
else:
# Unparametrized Tuple or plain tuple
return
if use_ellipsis:
element_type = tuple_params[0]
samples = memo.config.collection_check_strategy.iterate_samples(value)
for i, element in enumerate(samples):
try:
check_type_internal(element, element_type, memo)
except TypeCheckError as exc:
exc.append_path_element(f"item {i}")
raise
elif tuple_params == ((),):
if value != ():
raise TypeCheckError("is not an empty tuple")
else:
if len(value) != len(tuple_params):
raise TypeCheckError(
f"has wrong number of elements (expected {len(tuple_params)}, got "
f"{len(value)} instead)"
)
for i, (element, element_type) in enumerate(zip(value, tuple_params)):
try:
check_type_internal(element, element_type, memo)
except TypeCheckError as exc:
exc.append_path_element(f"item {i}")
raise
def check_union(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
errors: dict[str, TypeCheckError] = {}
try:
for type_ in args:
try:
check_type_internal(value, type_, memo)
return
except TypeCheckError as exc:
errors[get_type_name(type_)] = exc
formatted_errors = indent(
"\n".join(f"{key}: {error}" for key, error in errors.items()), " "
)
finally:
del errors # avoid creating ref cycle
raise TypeCheckError(f"did not match any element in the union:\n{formatted_errors}")
def check_uniontype(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
errors: dict[str, TypeCheckError] = {}
for type_ in args:
try:
check_type_internal(value, type_, memo)
return
except TypeCheckError as exc:
errors[get_type_name(type_)] = exc
formatted_errors = indent(
"\n".join(f"{key}: {error}" for key, error in errors.items()), " "
)
raise TypeCheckError(f"did not match any element in the union:\n{formatted_errors}")
def check_class(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isclass(value) and not isinstance(value, generic_alias_types):
raise TypeCheckError("is not a class")
if not args:
return
if isinstance(args[0], ForwardRef):
expected_class = evaluate_forwardref(args[0], memo)
else:
expected_class = args[0]
if expected_class is Any:
return
elif getattr(expected_class, "_is_protocol", False):
check_protocol(value, expected_class, (), memo)
elif isinstance(expected_class, TypeVar):
check_typevar(value, expected_class, (), memo, subclass_check=True)
elif get_origin(expected_class) is Union:
errors: dict[str, TypeCheckError] = {}
for arg in get_args(expected_class):
if arg is Any:
return
try:
check_class(value, type, (arg,), memo)
return
except TypeCheckError as exc:
errors[get_type_name(arg)] = exc
else:
formatted_errors = indent(
"\n".join(f"{key}: {error}" for key, error in errors.items()), " "
)
raise TypeCheckError(
f"did not match any element in the union:\n{formatted_errors}"
)
elif not issubclass(value, expected_class): # type: ignore[arg-type]
raise TypeCheckError(f"is not a subclass of {qualified_name(expected_class)}")
def check_newtype(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
check_type_internal(value, origin_type.__supertype__, memo)
def check_instance(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isinstance(value, origin_type):
raise TypeCheckError(f"is not an instance of {qualified_name(origin_type)}")
def check_typevar(
value: Any,
origin_type: TypeVar,
args: tuple[Any, ...],
memo: TypeCheckMemo,
*,
subclass_check: bool = False,
) -> None:
if origin_type.__bound__ is not None:
annotation = (
Type[origin_type.__bound__] if subclass_check else origin_type.__bound__
)
check_type_internal(value, annotation, memo)
elif origin_type.__constraints__:
for constraint in origin_type.__constraints__:
annotation = Type[constraint] if subclass_check else constraint
try:
check_type_internal(value, annotation, memo)
except TypeCheckError:
pass
else:
break
else:
formatted_constraints = ", ".join(
get_type_name(constraint) for constraint in origin_type.__constraints__
)
raise TypeCheckError(
f"does not match any of the constraints " f"({formatted_constraints})"
)
def _is_literal_type(typ: object) -> bool:
return typ is typing.Literal or typ is typing_extensions.Literal
def check_literal(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
def get_literal_args(literal_args: tuple[Any, ...]) -> tuple[Any, ...]:
retval: list[Any] = []
for arg in literal_args:
if _is_literal_type(get_origin(arg)):
retval.extend(get_literal_args(arg.__args__))
elif arg is None or isinstance(arg, (int, str, bytes, bool, Enum)):
retval.append(arg)
else:
raise TypeError(
f"Illegal literal value: {arg}"
) # TypeError here is deliberate
return tuple(retval)
final_args = tuple(get_literal_args(args))
try:
index = final_args.index(value)
except ValueError:
pass
else:
if type(final_args[index]) is type(value):
return
formatted_args = ", ".join(repr(arg) for arg in final_args)
raise TypeCheckError(f"is not any of ({formatted_args})") from None
def check_literal_string(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
check_type_internal(value, str, memo)
def check_typeguard(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
check_type_internal(value, bool, memo)
def check_none(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if value is not None:
raise TypeCheckError("is not None")
def check_number(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if origin_type is complex and not isinstance(value, (complex, float, int)):
raise TypeCheckError("is neither complex, float or int")
elif origin_type is float and not isinstance(value, (float, int)):
raise TypeCheckError("is neither float or int")
def check_io(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if origin_type is TextIO or (origin_type is IO and args == (str,)):
if not isinstance(value, TextIOBase):
raise TypeCheckError("is not a text based I/O object")
elif origin_type is BinaryIO or (origin_type is IO and args == (bytes,)):
if not isinstance(value, (RawIOBase, BufferedIOBase)):
raise TypeCheckError("is not a binary I/O object")
elif not isinstance(value, IOBase):
raise TypeCheckError("is not an I/O object")
def check_signature_compatible(
subject_callable: Callable[..., Any], protocol: type, attrname: str
) -> None:
subject_sig = inspect.signature(subject_callable)
protocol_sig = inspect.signature(getattr(protocol, attrname))
protocol_type: typing.Literal["instance", "class", "static"] = "instance"
subject_type: typing.Literal["instance", "class", "static"] = "instance"
# Check if the protocol-side method is a class method or static method
if attrname in protocol.__dict__:
descriptor = protocol.__dict__[attrname]
if isinstance(descriptor, staticmethod):
protocol_type = "static"
elif isinstance(descriptor, classmethod):
protocol_type = "class"
# Check if the subject-side method is a class method or static method
if inspect.ismethod(subject_callable) and inspect.isclass(
subject_callable.__self__
):
subject_type = "class"
elif not hasattr(subject_callable, "__self__"):
subject_type = "static"
if protocol_type == "instance" and subject_type != "instance":
raise TypeCheckError(
f"should be an instance method but it's a {subject_type} method"
)
elif protocol_type != "instance" and subject_type == "instance":
raise TypeCheckError(
f"should be a {protocol_type} method but it's an instance method"
)
expected_varargs = any(
param
for param in protocol_sig.parameters.values()
if param.kind is Parameter.VAR_POSITIONAL
)
has_varargs = any(
param
for param in subject_sig.parameters.values()
if param.kind is Parameter.VAR_POSITIONAL
)
if expected_varargs and not has_varargs:
raise TypeCheckError("should accept variable positional arguments but doesn't")
protocol_has_varkwargs = any(
param
for param in protocol_sig.parameters.values()
if param.kind is Parameter.VAR_KEYWORD
)
subject_has_varkwargs = any(
param
for param in subject_sig.parameters.values()
if param.kind is Parameter.VAR_KEYWORD
)
if protocol_has_varkwargs and not subject_has_varkwargs:
raise TypeCheckError("should accept variable keyword arguments but doesn't")
# Check that the callable has at least the expect amount of positional-only
# arguments (and no extra positional-only arguments without default values)
if not has_varargs:
protocol_args = [
param
for param in protocol_sig.parameters.values()
if param.kind
in (Parameter.POSITIONAL_ONLY, Parameter.POSITIONAL_OR_KEYWORD)
]
subject_args = [
param
for param in subject_sig.parameters.values()
if param.kind
in (Parameter.POSITIONAL_ONLY, Parameter.POSITIONAL_OR_KEYWORD)
]
# Remove the "self" parameter from the protocol arguments to match
if protocol_type == "instance":
protocol_args.pop(0)
for protocol_arg, subject_arg in zip_longest(protocol_args, subject_args):
if protocol_arg is None:
if subject_arg.default is Parameter.empty:
raise TypeCheckError("has too many mandatory positional arguments")
break
if subject_arg is None:
raise TypeCheckError("has too few positional arguments")
if (
protocol_arg.kind is Parameter.POSITIONAL_OR_KEYWORD
and subject_arg.kind is Parameter.POSITIONAL_ONLY
):
raise TypeCheckError(
f"has an argument ({subject_arg.name}) that should not be "
f"positional-only"
)
if (
protocol_arg.kind is Parameter.POSITIONAL_OR_KEYWORD
and protocol_arg.name != subject_arg.name
):
raise TypeCheckError(
f"has a positional argument ({subject_arg.name}) that should be "
f"named {protocol_arg.name!r} at this position"
)
protocol_kwonlyargs = {
param.name: param
for param in protocol_sig.parameters.values()
if param.kind is Parameter.KEYWORD_ONLY
}
subject_kwonlyargs = {
param.name: param
for param in subject_sig.parameters.values()
if param.kind is Parameter.KEYWORD_ONLY
}
if not subject_has_varkwargs:
# Check that the signature has at least the required keyword-only arguments, and
# no extra mandatory keyword-only arguments
if missing_kwonlyargs := [
param.name
for param in protocol_kwonlyargs.values()
if param.name not in subject_kwonlyargs
]:
raise TypeCheckError(
"is missing keyword-only arguments: " + ", ".join(missing_kwonlyargs)
)
if not protocol_has_varkwargs:
if extra_kwonlyargs := [
param.name
for param in subject_kwonlyargs.values()
if param.default is Parameter.empty
and param.name not in protocol_kwonlyargs
]:
raise TypeCheckError(
"has mandatory keyword-only arguments not present in the protocol: "
+ ", ".join(extra_kwonlyargs)
)
def check_protocol(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
origin_annotations = typing.get_type_hints(origin_type)
for attrname in sorted(typing_extensions.get_protocol_members(origin_type)):
if (annotation := origin_annotations.get(attrname)) is not None:
try:
subject_member = getattr(value, attrname)
except AttributeError:
raise TypeCheckError(
f"is not compatible with the {origin_type.__qualname__} "
f"protocol because it has no attribute named {attrname!r}"
) from None
try:
check_type_internal(subject_member, annotation, memo)
except TypeCheckError as exc:
raise TypeCheckError(
f"is not compatible with the {origin_type.__qualname__} "
f"protocol because its {attrname!r} attribute {exc}"
) from None
elif callable(getattr(origin_type, attrname)):
try:
subject_member = getattr(value, attrname)
except AttributeError:
raise TypeCheckError(
f"is not compatible with the {origin_type.__qualname__} "
f"protocol because it has no method named {attrname!r}"
) from None
if not callable(subject_member):
raise TypeCheckError(
f"is not compatible with the {origin_type.__qualname__} "
f"protocol because its {attrname!r} attribute is not a callable"
)
# TODO: implement assignability checks for parameter and return value
# annotations
try:
check_signature_compatible(subject_member, origin_type, attrname)
except TypeCheckError as exc:
raise TypeCheckError(
f"is not compatible with the {origin_type.__qualname__} "
f"protocol because its {attrname!r} method {exc}"
) from None
def check_byteslike(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isinstance(value, (bytearray, bytes, memoryview)):
raise TypeCheckError("is not bytes-like")
def check_self(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if memo.self_type is None:
raise TypeCheckError("cannot be checked against Self outside of a method call")
if isclass(value):
if not issubclass(value, memo.self_type):
raise TypeCheckError(
f"is not an instance of the self type "
f"({qualified_name(memo.self_type)})"
)
elif not isinstance(value, memo.self_type):
raise TypeCheckError(
f"is not an instance of the self type ({qualified_name(memo.self_type)})"
)
def check_paramspec(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
pass # No-op for now
def check_instanceof(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isinstance(value, origin_type):
raise TypeCheckError(f"is not an instance of {qualified_name(origin_type)}")
def check_type_internal(
value: Any,
annotation: Any,
memo: TypeCheckMemo,
) -> None:
"""
Check that the given object is compatible with the given type annotation.
This function should only be used by type checker callables. Applications should use
:func:`~.check_type` instead.
:param value: the value to check
:param annotation: the type annotation to check against
:param memo: a memo object containing configuration and information necessary for
looking up forward references
"""
if isinstance(annotation, ForwardRef):
try:
annotation = evaluate_forwardref(annotation, memo)
except NameError:
if memo.config.forward_ref_policy is ForwardRefPolicy.ERROR:
raise
elif memo.config.forward_ref_policy is ForwardRefPolicy.WARN:
warnings.warn(
f"Cannot resolve forward reference {annotation.__forward_arg__!r}",
TypeHintWarning,
stacklevel=get_stacklevel(),
)
return
if annotation is Any or annotation is SubclassableAny or isinstance(value, Mock):
return
# Skip type checks if value is an instance of a class that inherits from Any
if not isclass(value) and SubclassableAny in type(value).__bases__:
return
extras: tuple[Any, ...]
origin_type = get_origin(annotation)
if origin_type is Annotated:
annotation, *extras_ = get_args(annotation)
extras = tuple(extras_)
origin_type = get_origin(annotation)
else:
extras = ()
if origin_type is not None:
args = get_args(annotation)
# Compatibility hack to distinguish between unparametrized and empty tuple
# (tuple[()]), necessary due to https://github.com/python/cpython/issues/91137
if origin_type in (tuple, Tuple) and annotation is not Tuple and not args:
args = ((),)
else:
origin_type = annotation
args = ()
for lookup_func in checker_lookup_functions:
checker = lookup_func(origin_type, args, extras)
if checker:
checker(value, origin_type, args, memo)
return
if isclass(origin_type):
if not isinstance(value, origin_type):
raise TypeCheckError(f"is not an instance of {qualified_name(origin_type)}")
elif type(origin_type) is str: # noqa: E721
warnings.warn(
f"Skipping type check against {origin_type!r}; this looks like a "
f"string-form forward reference imported from another module",
TypeHintWarning,
stacklevel=get_stacklevel(),
)
# Equality checks are applied to these
origin_type_checkers = {
bytes: check_byteslike,
AbstractSet: check_set,
BinaryIO: check_io,
Callable: check_callable,
collections.abc.Callable: check_callable,
complex: check_number,
dict: check_mapping,
Dict: check_mapping,
float: check_number,
frozenset: check_set,
IO: check_io,
list: check_list,
List: check_list,
typing.Literal: check_literal,
Mapping: check_mapping,
MutableMapping: check_mapping,
None: check_none,
collections.abc.Mapping: check_mapping,
collections.abc.MutableMapping: check_mapping,
Sequence: check_sequence,
collections.abc.Sequence: check_sequence,
collections.abc.Set: check_set,
set: check_set,
Set: check_set,
TextIO: check_io,
tuple: check_tuple,
Tuple: check_tuple,
type: check_class,
Type: check_class,
Union: check_union,
# On some versions of Python, these may simply be re-exports from "typing",
# but exactly which Python versions is subject to change.
# It's best to err on the safe side and just always specify these.
typing_extensions.Literal: check_literal,
typing_extensions.LiteralString: check_literal_string,
typing_extensions.Self: check_self,
typing_extensions.TypeGuard: check_typeguard,
}
if sys.version_info >= (3, 10):
origin_type_checkers[types.UnionType] = check_uniontype
origin_type_checkers[typing.TypeGuard] = check_typeguard
if sys.version_info >= (3, 11):
origin_type_checkers.update(
{typing.LiteralString: check_literal_string, typing.Self: check_self}
)
def builtin_checker_lookup(
origin_type: Any, args: tuple[Any, ...], extras: tuple[Any, ...]
) -> TypeCheckerCallable | None:
checker = origin_type_checkers.get(origin_type)
if checker is not None:
return checker
elif is_typeddict(origin_type):
return check_typed_dict
elif isclass(origin_type) and issubclass(
origin_type,
Tuple, # type: ignore[arg-type]
):
# NamedTuple
return check_tuple
elif getattr(origin_type, "_is_protocol", False):
return check_protocol
elif isinstance(origin_type, ParamSpec):
return check_paramspec
elif isinstance(origin_type, TypeVar):
return check_typevar
elif origin_type.__class__ is NewType:
# typing.NewType on Python 3.10+
return check_newtype
elif (
isfunction(origin_type)
and getattr(origin_type, "__module__", None) == "typing"
and getattr(origin_type, "__qualname__", "").startswith("NewType.")
and hasattr(origin_type, "__supertype__")
):
# typing.NewType on Python 3.9 and below
return check_newtype
return None
checker_lookup_functions.append(builtin_checker_lookup)
def load_plugins() -> None:
"""
Load all type checker lookup functions from entry points.
All entry points from the ``typeguard.checker_lookup`` group are loaded, and the
returned lookup functions are added to :data:`typeguard.checker_lookup_functions`.
.. note:: This function is called implicitly on import, unless the
``TYPEGUARD_DISABLE_PLUGIN_AUTOLOAD`` environment variable is present.
"""
for ep in entry_points(group="typeguard.checker_lookup"):
try:
plugin = ep.load()
except Exception as exc:
warnings.warn(
f"Failed to load plugin {ep.name!r}: " f"{qualified_name(exc)}: {exc}",
stacklevel=2,
)
continue
if not callable(plugin):
warnings.warn(
f"Plugin {ep} returned a non-callable object: {plugin!r}", stacklevel=2
)
continue
checker_lookup_functions.insert(0, plugin)
================================================
FILE: metaflow/_vendor/typeguard/_config.py
================================================
from __future__ import annotations
from collections.abc import Iterable
from dataclasses import dataclass
from enum import Enum, auto
from typing import TYPE_CHECKING, TypeVar
if TYPE_CHECKING:
from ._functions import TypeCheckFailCallback
T = TypeVar("T")
class ForwardRefPolicy(Enum):
"""
Defines how unresolved forward references are handled.
Members:
* ``ERROR``: propagate the :exc:`NameError` when the forward reference lookup fails
* ``WARN``: emit a :class:`~.TypeHintWarning` if the forward reference lookup fails
* ``IGNORE``: silently skip checks for unresolveable forward references
"""
ERROR = auto()
WARN = auto()
IGNORE = auto()
class CollectionCheckStrategy(Enum):
"""
Specifies how thoroughly the contents of collections are type checked.
This has an effect on the following built-in checkers:
* ``AbstractSet``
* ``Dict``
* ``List``
* ``Mapping``
* ``Set``
* ``Tuple[, ...]`` (arbitrarily sized tuples)
Members:
* ``FIRST_ITEM``: check only the first item
* ``ALL_ITEMS``: check all items
"""
FIRST_ITEM = auto()
ALL_ITEMS = auto()
def iterate_samples(self, collection: Iterable[T]) -> Iterable[T]:
if self is CollectionCheckStrategy.FIRST_ITEM:
try:
return [next(iter(collection))]
except StopIteration:
return ()
else:
return collection
@dataclass
class TypeCheckConfiguration:
"""
You can change Typeguard's behavior with these settings.
.. attribute:: typecheck_fail_callback
:type: Callable[[TypeCheckError, TypeCheckMemo], Any]
Callable that is called when type checking fails.
Default: ``None`` (the :exc:`~.TypeCheckError` is raised directly)
.. attribute:: forward_ref_policy
:type: ForwardRefPolicy
Specifies what to do when a forward reference fails to resolve.
Default: ``WARN``
.. attribute:: collection_check_strategy
:type: CollectionCheckStrategy
Specifies how thoroughly the contents of collections (list, dict, etc.) are
type checked.
Default: ``FIRST_ITEM``
.. attribute:: debug_instrumentation
:type: bool
If set to ``True``, the code of modules or functions instrumented by typeguard
is printed to ``sys.stderr`` after the instrumentation is done
Requires Python 3.9 or newer.
Default: ``False``
"""
forward_ref_policy: ForwardRefPolicy = ForwardRefPolicy.WARN
typecheck_fail_callback: TypeCheckFailCallback | None = None
collection_check_strategy: CollectionCheckStrategy = (
CollectionCheckStrategy.FIRST_ITEM
)
debug_instrumentation: bool = False
global_config = TypeCheckConfiguration()
================================================
FILE: metaflow/_vendor/typeguard/_decorators.py
================================================
from __future__ import annotations
import ast
import inspect
import sys
from collections.abc import Sequence
from functools import partial
from inspect import isclass, isfunction
from types import CodeType, FrameType, FunctionType
from typing import TYPE_CHECKING, Any, Callable, ForwardRef, TypeVar, cast, overload
from warnings import warn
from ._config import CollectionCheckStrategy, ForwardRefPolicy, global_config
from ._exceptions import InstrumentationWarning
from ._functions import TypeCheckFailCallback
from ._transformer import TypeguardTransformer
from ._utils import Unset, function_name, get_stacklevel, is_method_of, unset
T_CallableOrType = TypeVar("T_CallableOrType", bound=Callable[..., Any])
if TYPE_CHECKING:
from typeshed.stdlib.types import _Cell
def typeguard_ignore(f: T_CallableOrType) -> T_CallableOrType:
"""This decorator is a noop during static type-checking."""
return f
else:
from typing import no_type_check as typeguard_ignore # noqa: F401
def make_cell(value: object) -> _Cell:
return (lambda: value).__closure__[0] # type: ignore[index]
def find_target_function(
new_code: CodeType, target_path: Sequence[str], firstlineno: int
) -> CodeType | None:
target_name = target_path[0]
for const in new_code.co_consts:
if isinstance(const, CodeType):
if const.co_name == target_name:
if const.co_firstlineno == firstlineno:
return const
elif len(target_path) > 1:
target_code = find_target_function(
const, target_path[1:], firstlineno
)
if target_code:
return target_code
return None
def instrument(f: T_CallableOrType) -> FunctionType | str:
if not getattr(f, "__code__", None):
return "no code associated"
elif not getattr(f, "__module__", None):
return "__module__ attribute is not set"
elif f.__code__.co_filename == "":
return "cannot instrument functions defined in a REPL"
elif hasattr(f, "__wrapped__"):
return (
"@typechecked only supports instrumenting functions wrapped with "
"@classmethod, @staticmethod or @property"
)
target_path = [item for item in f.__qualname__.split(".") if item != ""]
module_source = inspect.getsource(sys.modules[f.__module__])
module_ast = ast.parse(module_source)
instrumentor = TypeguardTransformer(target_path, f.__code__.co_firstlineno)
instrumentor.visit(module_ast)
if not instrumentor.target_node or instrumentor.target_lineno is None:
return "instrumentor did not find the target function"
module_code = compile(module_ast, f.__code__.co_filename, "exec", dont_inherit=True)
new_code = find_target_function(
module_code, target_path, instrumentor.target_lineno
)
if not new_code:
return "cannot find the target function in the AST"
if global_config.debug_instrumentation and sys.version_info >= (3, 9):
# Find the matching AST node, then unparse it to source and print to stdout
print(
f"Source code of {f.__qualname__}() after instrumentation:"
"\n----------------------------------------------",
file=sys.stderr,
)
print(ast.unparse(instrumentor.target_node), file=sys.stderr)
print(
"----------------------------------------------",
file=sys.stderr,
)
closure = f.__closure__
if new_code.co_freevars != f.__code__.co_freevars:
# Create a new closure and find values for the new free variables
frame = cast(FrameType, inspect.currentframe())
frame = cast(FrameType, frame.f_back)
frame_locals = cast(FrameType, frame.f_back).f_locals
cells: list[_Cell] = []
for key in new_code.co_freevars:
if key in instrumentor.names_used_in_annotations:
# Find the value and make a new cell from it
value = frame_locals.get(key) or ForwardRef(key)
cells.append(make_cell(value))
else:
# Reuse the cell from the existing closure
assert f.__closure__
cells.append(f.__closure__[f.__code__.co_freevars.index(key)])
closure = tuple(cells)
new_function = FunctionType(new_code, f.__globals__, f.__name__, closure=closure)
new_function.__module__ = f.__module__
new_function.__name__ = f.__name__
new_function.__qualname__ = f.__qualname__
new_function.__annotations__ = f.__annotations__
new_function.__doc__ = f.__doc__
new_function.__defaults__ = f.__defaults__
new_function.__kwdefaults__ = f.__kwdefaults__
return new_function
@overload
def typechecked(
*,
forward_ref_policy: ForwardRefPolicy | Unset = unset,
typecheck_fail_callback: TypeCheckFailCallback | Unset = unset,
collection_check_strategy: CollectionCheckStrategy | Unset = unset,
debug_instrumentation: bool | Unset = unset,
) -> Callable[[T_CallableOrType], T_CallableOrType]: ...
@overload
def typechecked(target: T_CallableOrType) -> T_CallableOrType: ...
def typechecked(
target: T_CallableOrType | None = None,
*,
forward_ref_policy: ForwardRefPolicy | Unset = unset,
typecheck_fail_callback: TypeCheckFailCallback | Unset = unset,
collection_check_strategy: CollectionCheckStrategy | Unset = unset,
debug_instrumentation: bool | Unset = unset,
) -> Any:
"""
Instrument the target function to perform run-time type checking.
This decorator recompiles the target function, injecting code to type check
arguments, return values, yield values (excluding ``yield from``) and assignments to
annotated local variables.
This can also be used as a class decorator. This will instrument all type annotated
methods, including :func:`@classmethod `,
:func:`@staticmethod `, and :class:`@property ` decorated
methods in the class.
.. note:: When Python is run in optimized mode (``-O`` or ``-OO``, this decorator
is a no-op). This is a feature meant for selectively introducing type checking
into a code base where the checks aren't meant to be run in production.
:param target: the function or class to enable type checking for
:param forward_ref_policy: override for
:attr:`.TypeCheckConfiguration.forward_ref_policy`
:param typecheck_fail_callback: override for
:attr:`.TypeCheckConfiguration.typecheck_fail_callback`
:param collection_check_strategy: override for
:attr:`.TypeCheckConfiguration.collection_check_strategy`
:param debug_instrumentation: override for
:attr:`.TypeCheckConfiguration.debug_instrumentation`
"""
if target is None:
return partial(
typechecked,
forward_ref_policy=forward_ref_policy,
typecheck_fail_callback=typecheck_fail_callback,
collection_check_strategy=collection_check_strategy,
debug_instrumentation=debug_instrumentation,
)
if not __debug__:
return target
if isclass(target):
for key, attr in target.__dict__.items():
if is_method_of(attr, target):
retval = instrument(attr)
if isfunction(retval):
setattr(target, key, retval)
elif isinstance(attr, (classmethod, staticmethod)):
if is_method_of(attr.__func__, target):
retval = instrument(attr.__func__)
if isfunction(retval):
wrapper = attr.__class__(retval)
setattr(target, key, wrapper)
elif isinstance(attr, property):
kwargs: dict[str, Any] = dict(doc=attr.__doc__)
for name in ("fset", "fget", "fdel"):
property_func = kwargs[name] = getattr(attr, name)
if is_method_of(property_func, target):
retval = instrument(property_func)
if isfunction(retval):
kwargs[name] = retval
setattr(target, key, attr.__class__(**kwargs))
return target
# Find either the first Python wrapper or the actual function
wrapper_class: (
type[classmethod[Any, Any, Any]] | type[staticmethod[Any, Any]] | None
) = None
if isinstance(target, (classmethod, staticmethod)):
wrapper_class = target.__class__
target = target.__func__
retval = instrument(target)
if isinstance(retval, str):
warn(
f"{retval} -- not typechecking {function_name(target)}",
InstrumentationWarning,
stacklevel=get_stacklevel(),
)
return target
if wrapper_class is None:
return retval
else:
return wrapper_class(retval)
================================================
FILE: metaflow/_vendor/typeguard/_exceptions.py
================================================
from collections import deque
from typing import Deque
class TypeHintWarning(UserWarning):
"""
A warning that is emitted when a type hint in string form could not be resolved to
an actual type.
"""
class TypeCheckWarning(UserWarning):
"""Emitted by typeguard's type checkers when a type mismatch is detected."""
def __init__(self, message: str):
super().__init__(message)
class InstrumentationWarning(UserWarning):
"""Emitted when there's a problem with instrumenting a function for type checks."""
def __init__(self, message: str):
super().__init__(message)
class TypeCheckError(Exception):
"""
Raised by typeguard's type checkers when a type mismatch is detected.
"""
def __init__(self, message: str):
super().__init__(message)
self._path: Deque[str] = deque()
def append_path_element(self, element: str) -> None:
self._path.append(element)
def __str__(self) -> str:
if self._path:
return " of ".join(self._path) + " " + str(self.args[0])
else:
return str(self.args[0])
================================================
FILE: metaflow/_vendor/typeguard/_functions.py
================================================
from __future__ import annotations
import sys
import warnings
from typing import Any, Callable, NoReturn, TypeVar, Union, overload
from . import _suppression
from ._checkers import BINARY_MAGIC_METHODS, check_type_internal
from ._config import (
CollectionCheckStrategy,
ForwardRefPolicy,
TypeCheckConfiguration,
)
from ._exceptions import TypeCheckError, TypeCheckWarning
from ._memo import TypeCheckMemo
from ._utils import get_stacklevel, qualified_name
if sys.version_info >= (3, 11):
from typing import Literal, Never, TypeAlias
else:
from metaflow._vendor.typing_extensions import Literal, Never, TypeAlias
T = TypeVar("T")
TypeCheckFailCallback: TypeAlias = Callable[[TypeCheckError, TypeCheckMemo], Any]
@overload
def check_type(
value: object,
expected_type: type[T],
*,
forward_ref_policy: ForwardRefPolicy = ...,
typecheck_fail_callback: TypeCheckFailCallback | None = ...,
collection_check_strategy: CollectionCheckStrategy = ...,
) -> T: ...
@overload
def check_type(
value: object,
expected_type: Any,
*,
forward_ref_policy: ForwardRefPolicy = ...,
typecheck_fail_callback: TypeCheckFailCallback | None = ...,
collection_check_strategy: CollectionCheckStrategy = ...,
) -> Any: ...
def check_type(
value: object,
expected_type: Any,
*,
forward_ref_policy: ForwardRefPolicy = TypeCheckConfiguration().forward_ref_policy,
typecheck_fail_callback: TypeCheckFailCallback | None = (
TypeCheckConfiguration().typecheck_fail_callback
),
collection_check_strategy: CollectionCheckStrategy = (
TypeCheckConfiguration().collection_check_strategy
),
) -> Any:
"""
Ensure that ``value`` matches ``expected_type``.
The types from the :mod:`typing` module do not support :func:`isinstance` or
:func:`issubclass` so a number of type specific checks are required. This function
knows which checker to call for which type.
This function wraps :func:`~.check_type_internal` in the following ways:
* Respects type checking suppression (:func:`~.suppress_type_checks`)
* Forms a :class:`~.TypeCheckMemo` from the current stack frame
* Calls the configured type check fail callback if the check fails
Note that this function is independent of the globally shared configuration in
:data:`typeguard.config`. This means that usage within libraries is safe from being
affected configuration changes made by other libraries or by the integrating
application. Instead, configuration options have the same default values as their
corresponding fields in :class:`TypeCheckConfiguration`.
:param value: value to be checked against ``expected_type``
:param expected_type: a class or generic type instance, or a tuple of such things
:param forward_ref_policy: see :attr:`TypeCheckConfiguration.forward_ref_policy`
:param typecheck_fail_callback:
see :attr`TypeCheckConfiguration.typecheck_fail_callback`
:param collection_check_strategy:
see :attr:`TypeCheckConfiguration.collection_check_strategy`
:return: ``value``, unmodified
:raises TypeCheckError: if there is a type mismatch
"""
if type(expected_type) is tuple:
expected_type = Union[expected_type]
config = TypeCheckConfiguration(
forward_ref_policy=forward_ref_policy,
typecheck_fail_callback=typecheck_fail_callback,
collection_check_strategy=collection_check_strategy,
)
if _suppression.type_checks_suppressed or expected_type is Any:
return value
frame = sys._getframe(1)
memo = TypeCheckMemo(frame.f_globals, frame.f_locals, config=config)
try:
check_type_internal(value, expected_type, memo)
except TypeCheckError as exc:
exc.append_path_element(qualified_name(value, add_class_prefix=True))
if config.typecheck_fail_callback:
config.typecheck_fail_callback(exc, memo)
else:
raise
return value
def check_argument_types(
func_name: str,
arguments: dict[str, tuple[Any, Any]],
memo: TypeCheckMemo,
) -> Literal[True]:
if _suppression.type_checks_suppressed:
return True
for argname, (value, annotation) in arguments.items():
if annotation is NoReturn or annotation is Never:
exc = TypeCheckError(
f"{func_name}() was declared never to be called but it was"
)
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise exc
try:
check_type_internal(value, annotation, memo)
except TypeCheckError as exc:
qualname = qualified_name(value, add_class_prefix=True)
exc.append_path_element(f'argument "{argname}" ({qualname})')
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise
return True
def check_return_type(
func_name: str,
retval: T,
annotation: Any,
memo: TypeCheckMemo,
) -> T:
if _suppression.type_checks_suppressed:
return retval
if annotation is NoReturn or annotation is Never:
exc = TypeCheckError(f"{func_name}() was declared never to return but it did")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise exc
try:
check_type_internal(retval, annotation, memo)
except TypeCheckError as exc:
# Allow NotImplemented if this is a binary magic method (__eq__() et al)
if retval is NotImplemented and annotation is bool:
# This does (and cannot) not check if it's actually a method
func_name = func_name.rsplit(".", 1)[-1]
if func_name in BINARY_MAGIC_METHODS:
return retval
qualname = qualified_name(retval, add_class_prefix=True)
exc.append_path_element(f"the return value ({qualname})")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise
return retval
def check_send_type(
func_name: str,
sendval: T,
annotation: Any,
memo: TypeCheckMemo,
) -> T:
if _suppression.type_checks_suppressed:
return sendval
if annotation is NoReturn or annotation is Never:
exc = TypeCheckError(
f"{func_name}() was declared never to be sent a value to but it was"
)
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise exc
try:
check_type_internal(sendval, annotation, memo)
except TypeCheckError as exc:
qualname = qualified_name(sendval, add_class_prefix=True)
exc.append_path_element(f"the value sent to generator ({qualname})")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise
return sendval
def check_yield_type(
func_name: str,
yieldval: T,
annotation: Any,
memo: TypeCheckMemo,
) -> T:
if _suppression.type_checks_suppressed:
return yieldval
if annotation is NoReturn or annotation is Never:
exc = TypeCheckError(f"{func_name}() was declared never to yield but it did")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise exc
try:
check_type_internal(yieldval, annotation, memo)
except TypeCheckError as exc:
qualname = qualified_name(yieldval, add_class_prefix=True)
exc.append_path_element(f"the yielded value ({qualname})")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise
return yieldval
def check_variable_assignment(
value: object, varname: str, annotation: Any, memo: TypeCheckMemo
) -> Any:
if _suppression.type_checks_suppressed:
return value
try:
check_type_internal(value, annotation, memo)
except TypeCheckError as exc:
qualname = qualified_name(value, add_class_prefix=True)
exc.append_path_element(f"value assigned to {varname} ({qualname})")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise
return value
def check_multi_variable_assignment(
value: Any, targets: list[dict[str, Any]], memo: TypeCheckMemo
) -> Any:
if max(len(target) for target in targets) == 1:
iterated_values = [value]
else:
iterated_values = list(value)
if not _suppression.type_checks_suppressed:
for expected_types in targets:
value_index = 0
for ann_index, (varname, expected_type) in enumerate(
expected_types.items()
):
if varname.startswith("*"):
varname = varname[1:]
keys_left = len(expected_types) - 1 - ann_index
next_value_index = len(iterated_values) - keys_left
obj: object = iterated_values[value_index:next_value_index]
value_index = next_value_index
else:
obj = iterated_values[value_index]
value_index += 1
try:
check_type_internal(obj, expected_type, memo)
except TypeCheckError as exc:
qualname = qualified_name(obj, add_class_prefix=True)
exc.append_path_element(f"value assigned to {varname} ({qualname})")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise
return iterated_values[0] if len(iterated_values) == 1 else iterated_values
def warn_on_error(exc: TypeCheckError, memo: TypeCheckMemo) -> None:
"""
Emit a warning on a type mismatch.
This is intended to be used as an error handler in
:attr:`TypeCheckConfiguration.typecheck_fail_callback`.
"""
warnings.warn(TypeCheckWarning(str(exc)), stacklevel=get_stacklevel())
================================================
FILE: metaflow/_vendor/typeguard/_importhook.py
================================================
from __future__ import annotations
import ast
import sys
import types
from collections.abc import Callable, Iterable
from importlib.abc import MetaPathFinder
from importlib.machinery import ModuleSpec, SourceFileLoader
from importlib.util import cache_from_source, decode_source
from inspect import isclass
from os import PathLike
from types import CodeType, ModuleType, TracebackType
from typing import Sequence, TypeVar
from unittest.mock import patch
from ._config import global_config
from ._transformer import TypeguardTransformer
if sys.version_info >= (3, 12):
from collections.abc import Buffer
else:
from metaflow._vendor.typing_extensions import Buffer
if sys.version_info >= (3, 11):
from typing import ParamSpec
else:
from metaflow._vendor.typing_extensions import ParamSpec
if sys.version_info >= (3, 10):
from importlib.metadata import PackageNotFoundError, version
else:
from metaflow._vendor.importlib_metadata import PackageNotFoundError, version
try:
OPTIMIZATION = "typeguard" + "".join(version("typeguard").split(".")[:3])
except PackageNotFoundError:
OPTIMIZATION = "typeguard"
P = ParamSpec("P")
T = TypeVar("T")
# The name of this function is magical
def _call_with_frames_removed(
f: Callable[P, T], *args: P.args, **kwargs: P.kwargs
) -> T:
return f(*args, **kwargs)
def optimized_cache_from_source(path: str, debug_override: bool | None = None) -> str:
return cache_from_source(path, debug_override, optimization=OPTIMIZATION)
class TypeguardLoader(SourceFileLoader):
@staticmethod
def source_to_code(
data: Buffer | str | ast.Module | ast.Expression | ast.Interactive,
path: Buffer | str | PathLike[str] = "",
) -> CodeType:
if isinstance(data, (ast.Module, ast.Expression, ast.Interactive)):
tree = data
else:
if isinstance(data, str):
source = data
else:
source = decode_source(data)
tree = _call_with_frames_removed(
ast.parse,
source,
path,
"exec",
)
tree = TypeguardTransformer().visit(tree)
ast.fix_missing_locations(tree)
if global_config.debug_instrumentation and sys.version_info >= (3, 9):
print(
f"Source code of {path!r} after instrumentation:\n"
"----------------------------------------------",
file=sys.stderr,
)
print(ast.unparse(tree), file=sys.stderr)
print("----------------------------------------------", file=sys.stderr)
return _call_with_frames_removed(
compile, tree, path, "exec", 0, dont_inherit=True
)
def exec_module(self, module: ModuleType) -> None:
# Use a custom optimization marker – the import lock should make this monkey
# patch safe
with patch(
"importlib._bootstrap_external.cache_from_source",
optimized_cache_from_source,
):
super().exec_module(module)
class TypeguardFinder(MetaPathFinder):
"""
Wraps another path finder and instruments the module with
:func:`@typechecked ` if :meth:`should_instrument` returns
``True``.
Should not be used directly, but rather via :func:`~.install_import_hook`.
.. versionadded:: 2.6
"""
def __init__(self, packages: list[str] | None, original_pathfinder: MetaPathFinder):
self.packages = packages
self._original_pathfinder = original_pathfinder
def find_spec(
self,
fullname: str,
path: Sequence[str] | None,
target: types.ModuleType | None = None,
) -> ModuleSpec | None:
if self.should_instrument(fullname):
spec = self._original_pathfinder.find_spec(fullname, path, target)
if spec is not None and isinstance(spec.loader, SourceFileLoader):
spec.loader = TypeguardLoader(spec.loader.name, spec.loader.path)
return spec
return None
def should_instrument(self, module_name: str) -> bool:
"""
Determine whether the module with the given name should be instrumented.
:param module_name: full name of the module that is about to be imported (e.g.
``xyz.abc``)
"""
if self.packages is None:
return True
for package in self.packages:
if module_name == package or module_name.startswith(package + "."):
return True
return False
class ImportHookManager:
"""
A handle that can be used to uninstall the Typeguard import hook.
"""
def __init__(self, hook: MetaPathFinder):
self.hook = hook
def __enter__(self) -> None:
pass
def __exit__(
self,
exc_type: type[BaseException],
exc_val: BaseException,
exc_tb: TracebackType,
) -> None:
self.uninstall()
def uninstall(self) -> None:
"""Uninstall the import hook."""
try:
sys.meta_path.remove(self.hook)
except ValueError:
pass # already removed
def install_import_hook(
packages: Iterable[str] | None = None,
*,
cls: type[TypeguardFinder] = TypeguardFinder,
) -> ImportHookManager:
"""
Install an import hook that instruments functions for automatic type checking.
This only affects modules loaded **after** this hook has been installed.
:param packages: an iterable of package names to instrument, or ``None`` to
instrument all packages
:param cls: a custom meta path finder class
:return: a context manager that uninstalls the hook on exit (or when you call
``.uninstall()``)
.. versionadded:: 2.6
"""
if packages is None:
target_packages: list[str] | None = None
elif isinstance(packages, str):
target_packages = [packages]
else:
target_packages = list(packages)
for finder in sys.meta_path:
if (
isclass(finder)
and finder.__name__ == "PathFinder"
and hasattr(finder, "find_spec")
):
break
else:
raise RuntimeError("Cannot find a PathFinder in sys.meta_path")
hook = cls(target_packages, finder)
sys.meta_path.insert(0, hook)
return ImportHookManager(hook)
================================================
FILE: metaflow/_vendor/typeguard/_memo.py
================================================
from __future__ import annotations
from typing import Any
from metaflow._vendor.typeguard._config import TypeCheckConfiguration, global_config
class TypeCheckMemo:
"""
Contains information necessary for type checkers to do their work.
.. attribute:: globals
:type: dict[str, Any]
Dictionary of global variables to use for resolving forward references.
.. attribute:: locals
:type: dict[str, Any]
Dictionary of local variables to use for resolving forward references.
.. attribute:: self_type
:type: type | None
When running type checks within an instance method or class method, this is the
class object that the first argument (usually named ``self`` or ``cls``) refers
to.
.. attribute:: config
:type: TypeCheckConfiguration
Contains the configuration for a particular set of type checking operations.
"""
__slots__ = "globals", "locals", "self_type", "config"
def __init__(
self,
globals: dict[str, Any],
locals: dict[str, Any],
*,
self_type: type | None = None,
config: TypeCheckConfiguration = global_config,
):
self.globals = globals
self.locals = locals
self.self_type = self_type
self.config = config
================================================
FILE: metaflow/_vendor/typeguard/_pytest_plugin.py
================================================
from __future__ import annotations
import sys
import warnings
from typing import TYPE_CHECKING, Any, Literal
from metaflow._vendor.typeguard._config import CollectionCheckStrategy, ForwardRefPolicy, global_config
from metaflow._vendor.typeguard._exceptions import InstrumentationWarning
from metaflow._vendor.typeguard._importhook import install_import_hook
from metaflow._vendor.typeguard._utils import qualified_name, resolve_reference
if TYPE_CHECKING:
from pytest import Config, Parser
def pytest_addoption(parser: Parser) -> None:
def add_ini_option(
opt_type: (
Literal["string", "paths", "pathlist", "args", "linelist", "bool"] | None
),
) -> None:
parser.addini(
group.options[-1].names()[0][2:],
group.options[-1].attrs()["help"],
opt_type,
)
group = parser.getgroup("typeguard")
group.addoption(
"--typeguard-packages",
action="store",
help="comma separated name list of packages and modules to instrument for "
"type checking, or :all: to instrument all modules loaded after typeguard",
)
add_ini_option("linelist")
group.addoption(
"--typeguard-debug-instrumentation",
action="store_true",
help="print all instrumented code to stderr",
)
add_ini_option("bool")
group.addoption(
"--typeguard-typecheck-fail-callback",
action="store",
help=(
"a module:varname (e.g. typeguard:warn_on_error) reference to a function "
"that is called (with the exception, and memo object as arguments) to "
"handle a TypeCheckError"
),
)
add_ini_option("string")
group.addoption(
"--typeguard-forward-ref-policy",
action="store",
choices=list(ForwardRefPolicy.__members__),
help=(
"determines how to deal with unresolveable forward references in type "
"annotations"
),
)
add_ini_option("string")
group.addoption(
"--typeguard-collection-check-strategy",
action="store",
choices=list(CollectionCheckStrategy.__members__),
help="determines how thoroughly to check collections (list, dict, etc)",
)
add_ini_option("string")
def pytest_configure(config: Config) -> None:
def getoption(name: str) -> Any:
return config.getoption(name.replace("-", "_")) or config.getini(name)
packages: list[str] | None = []
if packages_option := config.getoption("typeguard_packages"):
packages = [pkg.strip() for pkg in packages_option.split(",")]
elif packages_ini := config.getini("typeguard-packages"):
packages = packages_ini
if packages:
if packages == [":all:"]:
packages = None
else:
already_imported_packages = sorted(
package for package in packages if package in sys.modules
)
if already_imported_packages:
warnings.warn(
f"typeguard cannot check these packages because they are already "
f"imported: {', '.join(already_imported_packages)}",
InstrumentationWarning,
stacklevel=1,
)
install_import_hook(packages=packages)
debug_option = getoption("typeguard-debug-instrumentation")
if debug_option:
global_config.debug_instrumentation = True
fail_callback_option = getoption("typeguard-typecheck-fail-callback")
if fail_callback_option:
callback = resolve_reference(fail_callback_option)
if not callable(callback):
raise TypeError(
f"{fail_callback_option} ({qualified_name(callback.__class__)}) is not "
f"a callable"
)
global_config.typecheck_fail_callback = callback
forward_ref_policy_option = getoption("typeguard-forward-ref-policy")
if forward_ref_policy_option:
forward_ref_policy = ForwardRefPolicy.__members__[forward_ref_policy_option]
global_config.forward_ref_policy = forward_ref_policy
collection_check_strategy_option = getoption("typeguard-collection-check-strategy")
if collection_check_strategy_option:
collection_check_strategy = CollectionCheckStrategy.__members__[
collection_check_strategy_option
]
global_config.collection_check_strategy = collection_check_strategy
================================================
FILE: metaflow/_vendor/typeguard/_suppression.py
================================================
from __future__ import annotations
import sys
from collections.abc import Callable, Generator
from contextlib import contextmanager
from functools import update_wrapper
from threading import Lock
from typing import ContextManager, TypeVar, overload
if sys.version_info >= (3, 10):
from typing import ParamSpec
else:
from metaflow._vendor.typing_extensions import ParamSpec
P = ParamSpec("P")
T = TypeVar("T")
type_checks_suppressed = 0
type_checks_suppress_lock = Lock()
@overload
def suppress_type_checks(func: Callable[P, T]) -> Callable[P, T]: ...
@overload
def suppress_type_checks() -> ContextManager[None]: ...
def suppress_type_checks(
func: Callable[P, T] | None = None,
) -> Callable[P, T] | ContextManager[None]:
"""
Temporarily suppress all type checking.
This function has two operating modes, based on how it's used:
#. as a context manager (``with suppress_type_checks(): ...``)
#. as a decorator (``@suppress_type_checks``)
When used as a context manager, :func:`check_type` and any automatically
instrumented functions skip the actual type checking. These context managers can be
nested.
When used as a decorator, all type checking is suppressed while the function is
running.
Type checking will resume once no more context managers are active and no decorated
functions are running.
Both operating modes are thread-safe.
"""
def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
global type_checks_suppressed
with type_checks_suppress_lock:
type_checks_suppressed += 1
assert func is not None
try:
return func(*args, **kwargs)
finally:
with type_checks_suppress_lock:
type_checks_suppressed -= 1
def cm() -> Generator[None, None, None]:
global type_checks_suppressed
with type_checks_suppress_lock:
type_checks_suppressed += 1
try:
yield
finally:
with type_checks_suppress_lock:
type_checks_suppressed -= 1
if func is None:
# Context manager mode
return contextmanager(cm)()
else:
# Decorator mode
update_wrapper(wrapper, func)
return wrapper
================================================
FILE: metaflow/_vendor/typeguard/_transformer.py
================================================
from __future__ import annotations
import ast
import builtins
import sys
import typing
from ast import (
AST,
Add,
AnnAssign,
Assign,
AsyncFunctionDef,
Attribute,
AugAssign,
BinOp,
BitAnd,
BitOr,
BitXor,
Call,
ClassDef,
Constant,
Dict,
Div,
Expr,
Expression,
FloorDiv,
FunctionDef,
If,
Import,
ImportFrom,
Index,
List,
Load,
LShift,
MatMult,
Mod,
Module,
Mult,
Name,
NamedExpr,
NodeTransformer,
NodeVisitor,
Pass,
Pow,
Return,
RShift,
Starred,
Store,
Sub,
Subscript,
Tuple,
Yield,
YieldFrom,
alias,
copy_location,
expr,
fix_missing_locations,
keyword,
walk,
)
from collections import defaultdict
from collections.abc import Generator, Sequence
from contextlib import contextmanager
from copy import deepcopy
from dataclasses import dataclass, field
from typing import Any, ClassVar, cast, overload
generator_names = (
"typing.Generator",
"collections.abc.Generator",
"typing.Iterator",
"collections.abc.Iterator",
"typing.Iterable",
"collections.abc.Iterable",
"typing.AsyncIterator",
"collections.abc.AsyncIterator",
"typing.AsyncIterable",
"collections.abc.AsyncIterable",
"typing.AsyncGenerator",
"collections.abc.AsyncGenerator",
)
anytype_names = (
"typing.Any",
"typing_extensions.Any",
)
literal_names = (
"typing.Literal",
"typing_extensions.Literal",
)
annotated_names = (
"typing.Annotated",
"typing_extensions.Annotated",
)
ignore_decorators = (
"typing.no_type_check",
"typeguard.typeguard_ignore",
)
aug_assign_functions = {
Add: "iadd",
Sub: "isub",
Mult: "imul",
MatMult: "imatmul",
Div: "itruediv",
FloorDiv: "ifloordiv",
Mod: "imod",
Pow: "ipow",
LShift: "ilshift",
RShift: "irshift",
BitAnd: "iand",
BitXor: "ixor",
BitOr: "ior",
}
@dataclass
class TransformMemo:
node: Module | ClassDef | FunctionDef | AsyncFunctionDef | None
parent: TransformMemo | None
path: tuple[str, ...]
joined_path: Constant = field(init=False)
return_annotation: expr | None = None
yield_annotation: expr | None = None
send_annotation: expr | None = None
is_async: bool = False
local_names: set[str] = field(init=False, default_factory=set)
imported_names: dict[str, str] = field(init=False, default_factory=dict)
ignored_names: set[str] = field(init=False, default_factory=set)
load_names: defaultdict[str, dict[str, Name]] = field(
init=False, default_factory=lambda: defaultdict(dict)
)
has_yield_expressions: bool = field(init=False, default=False)
has_return_expressions: bool = field(init=False, default=False)
memo_var_name: Name | None = field(init=False, default=None)
should_instrument: bool = field(init=False, default=True)
variable_annotations: dict[str, expr] = field(init=False, default_factory=dict)
configuration_overrides: dict[str, Any] = field(init=False, default_factory=dict)
code_inject_index: int = field(init=False, default=0)
def __post_init__(self) -> None:
elements: list[str] = []
memo = self
while isinstance(memo.node, (ClassDef, FunctionDef, AsyncFunctionDef)):
elements.insert(0, memo.node.name)
if not memo.parent:
break
memo = memo.parent
if isinstance(memo.node, (FunctionDef, AsyncFunctionDef)):
elements.insert(0, "")
self.joined_path = Constant(".".join(elements))
# Figure out where to insert instrumentation code
if self.node:
for index, child in enumerate(self.node.body):
if isinstance(child, ImportFrom) and child.module == "__future__":
# (module only) __future__ imports must come first
continue
elif (
isinstance(child, Expr)
and isinstance(child.value, Constant)
and isinstance(child.value.value, str)
):
continue # docstring
self.code_inject_index = index
break
def get_unused_name(self, name: str) -> str:
memo: TransformMemo | None = self
while memo is not None:
if name in memo.local_names:
memo = self
name += "_"
else:
memo = memo.parent
self.local_names.add(name)
return name
def is_ignored_name(self, expression: expr | Expr | None) -> bool:
top_expression = (
expression.value if isinstance(expression, Expr) else expression
)
if isinstance(top_expression, Attribute) and isinstance(
top_expression.value, Name
):
name = top_expression.value.id
elif isinstance(top_expression, Name):
name = top_expression.id
else:
return False
memo: TransformMemo | None = self
while memo is not None:
if name in memo.ignored_names:
return True
memo = memo.parent
return False
def get_memo_name(self) -> Name:
if not self.memo_var_name:
self.memo_var_name = Name(id="memo", ctx=Load())
return self.memo_var_name
def get_import(self, module: str, name: str) -> Name:
if module in self.load_names and name in self.load_names[module]:
return self.load_names[module][name]
qualified_name = f"{module}.{name}"
if name in self.imported_names and self.imported_names[name] == qualified_name:
return Name(id=name, ctx=Load())
alias = self.get_unused_name(name)
node = self.load_names[module][name] = Name(id=alias, ctx=Load())
self.imported_names[name] = qualified_name
return node
def insert_imports(self, node: Module | FunctionDef | AsyncFunctionDef) -> None:
"""Insert imports needed by injected code."""
if not self.load_names:
return
# Insert imports after any "from __future__ ..." imports and any docstring
for modulename, names in self.load_names.items():
aliases = [
alias(orig_name, new_name.id if orig_name != new_name.id else None)
for orig_name, new_name in sorted(names.items())
]
node.body.insert(self.code_inject_index, ImportFrom(modulename, aliases, 0))
def name_matches(self, expression: expr | Expr | None, *names: str) -> bool:
if expression is None:
return False
path: list[str] = []
top_expression = (
expression.value if isinstance(expression, Expr) else expression
)
if isinstance(top_expression, Subscript):
top_expression = top_expression.value
elif isinstance(top_expression, Call):
top_expression = top_expression.func
while isinstance(top_expression, Attribute):
path.insert(0, top_expression.attr)
top_expression = top_expression.value
if not isinstance(top_expression, Name):
return False
if top_expression.id in self.imported_names:
translated = self.imported_names[top_expression.id]
elif hasattr(builtins, top_expression.id):
translated = "builtins." + top_expression.id
else:
translated = top_expression.id
path.insert(0, translated)
joined_path = ".".join(path)
if joined_path in names:
return True
elif self.parent:
return self.parent.name_matches(expression, *names)
else:
return False
def get_config_keywords(self) -> list[keyword]:
if self.parent and isinstance(self.parent.node, ClassDef):
overrides = self.parent.configuration_overrides.copy()
else:
overrides = {}
overrides.update(self.configuration_overrides)
return [keyword(key, value) for key, value in overrides.items()]
class NameCollector(NodeVisitor):
def __init__(self) -> None:
self.names: set[str] = set()
def visit_Import(self, node: Import) -> None:
for name in node.names:
self.names.add(name.asname or name.name)
def visit_ImportFrom(self, node: ImportFrom) -> None:
for name in node.names:
self.names.add(name.asname or name.name)
def visit_Assign(self, node: Assign) -> None:
for target in node.targets:
if isinstance(target, Name):
self.names.add(target.id)
def visit_NamedExpr(self, node: NamedExpr) -> Any:
if isinstance(node.target, Name):
self.names.add(node.target.id)
def visit_FunctionDef(self, node: FunctionDef) -> None:
pass
def visit_ClassDef(self, node: ClassDef) -> None:
pass
class GeneratorDetector(NodeVisitor):
"""Detects if a function node is a generator function."""
contains_yields: bool = False
in_root_function: bool = False
def visit_Yield(self, node: Yield) -> Any:
self.contains_yields = True
def visit_YieldFrom(self, node: YieldFrom) -> Any:
self.contains_yields = True
def visit_ClassDef(self, node: ClassDef) -> Any:
pass
def visit_FunctionDef(self, node: FunctionDef | AsyncFunctionDef) -> Any:
if not self.in_root_function:
self.in_root_function = True
self.generic_visit(node)
self.in_root_function = False
def visit_AsyncFunctionDef(self, node: AsyncFunctionDef) -> Any:
self.visit_FunctionDef(node)
class AnnotationTransformer(NodeTransformer):
type_substitutions: ClassVar[dict[str, tuple[str, str]]] = {
"builtins.dict": ("typing", "Dict"),
"builtins.list": ("typing", "List"),
"builtins.tuple": ("typing", "Tuple"),
"builtins.set": ("typing", "Set"),
"builtins.frozenset": ("typing", "FrozenSet"),
}
def __init__(self, transformer: TypeguardTransformer):
self.transformer = transformer
self._memo = transformer._memo
self._level = 0
def visit(self, node: AST) -> Any:
# Don't process Literals
if isinstance(node, expr) and self._memo.name_matches(node, *literal_names):
return node
self._level += 1
new_node = super().visit(node)
self._level -= 1
if isinstance(new_node, Expression) and not hasattr(new_node, "body"):
return None
# Return None if this new node matches a variation of typing.Any
if (
self._level == 0
and isinstance(new_node, expr)
and self._memo.name_matches(new_node, *anytype_names)
):
return None
return new_node
def visit_BinOp(self, node: BinOp) -> Any:
self.generic_visit(node)
if isinstance(node.op, BitOr):
# If either branch of the BinOp has been transformed to `None`, it means
# that a type in the union was ignored, so the entire annotation should e
# ignored
if not hasattr(node, "left") or not hasattr(node, "right"):
return None
# Return Any if either side is Any
if self._memo.name_matches(node.left, *anytype_names):
return node.left
elif self._memo.name_matches(node.right, *anytype_names):
return node.right
if sys.version_info < (3, 10):
union_name = self.transformer._get_import("typing", "Union")
return Subscript(
value=union_name,
slice=Index(
Tuple(elts=[node.left, node.right], ctx=Load()), ctx=Load()
),
ctx=Load(),
)
return node
def visit_Attribute(self, node: Attribute) -> Any:
if self._memo.is_ignored_name(node):
return None
return node
def visit_Subscript(self, node: Subscript) -> Any:
if self._memo.is_ignored_name(node.value):
return None
# The subscript of typing(_extensions).Literal can be any arbitrary string, so
# don't try to evaluate it as code
if node.slice:
if isinstance(node.slice, Index):
# Python 3.8
slice_value = node.slice.value # type: ignore[attr-defined]
else:
slice_value = node.slice
if isinstance(slice_value, Tuple):
if self._memo.name_matches(node.value, *annotated_names):
# Only treat the first argument to typing.Annotated as a potential
# forward reference
items = cast(
typing.List[expr],
[self.visit(slice_value.elts[0])] + slice_value.elts[1:],
)
else:
items = cast(
typing.List[expr],
[self.visit(item) for item in slice_value.elts],
)
# If this is a Union and any of the items is Any, erase the entire
# annotation
if self._memo.name_matches(node.value, "typing.Union") and any(
item is None
or (
isinstance(item, expr)
and self._memo.name_matches(item, *anytype_names)
)
for item in items
):
return None
# If all items in the subscript were Any, erase the subscript entirely
if all(item is None for item in items):
return node.value
for index, item in enumerate(items):
if item is None:
items[index] = self.transformer._get_import("typing", "Any")
slice_value.elts = items
else:
self.generic_visit(node)
# If the transformer erased the slice entirely, just return the node
# value without the subscript (unless it's Optional, in which case erase
# the node entirely
if self._memo.name_matches(
node.value, "typing.Optional"
) and not hasattr(node, "slice"):
return None
if sys.version_info >= (3, 9) and not hasattr(node, "slice"):
return node.value
elif sys.version_info < (3, 9) and not hasattr(node.slice, "value"):
return node.value
return node
def visit_Name(self, node: Name) -> Any:
if self._memo.is_ignored_name(node):
return None
if sys.version_info < (3, 9):
for typename, substitute in self.type_substitutions.items():
if self._memo.name_matches(node, typename):
new_node = self.transformer._get_import(*substitute)
return copy_location(new_node, node)
return node
def visit_Call(self, node: Call) -> Any:
# Don't recurse into calls
return node
def visit_Constant(self, node: Constant) -> Any:
if isinstance(node.value, str):
expression = ast.parse(node.value, mode="eval")
new_node = self.visit(expression)
if new_node:
return copy_location(new_node.body, node)
else:
return None
return node
class TypeguardTransformer(NodeTransformer):
def __init__(
self, target_path: Sequence[str] | None = None, target_lineno: int | None = None
) -> None:
self._target_path = tuple(target_path) if target_path else None
self._memo = self._module_memo = TransformMemo(None, None, ())
self.names_used_in_annotations: set[str] = set()
self.target_node: FunctionDef | AsyncFunctionDef | None = None
self.target_lineno = target_lineno
def generic_visit(self, node: AST) -> AST:
has_non_empty_body_initially = bool(getattr(node, "body", None))
initial_type = type(node)
node = super().generic_visit(node)
if (
type(node) is initial_type
and has_non_empty_body_initially
and hasattr(node, "body")
and not node.body
):
# If we have still the same node type after transformation
# but we've optimised it's body away, we add a `pass` statement.
node.body = [Pass()]
return node
@contextmanager
def _use_memo(
self, node: ClassDef | FunctionDef | AsyncFunctionDef
) -> Generator[None, Any, None]:
new_memo = TransformMemo(node, self._memo, self._memo.path + (node.name,))
old_memo = self._memo
self._memo = new_memo
if isinstance(node, (FunctionDef, AsyncFunctionDef)):
new_memo.should_instrument = (
self._target_path is None or new_memo.path == self._target_path
)
if new_memo.should_instrument:
# Check if the function is a generator function
detector = GeneratorDetector()
detector.visit(node)
# Extract yield, send and return types where possible from a subscripted
# annotation like Generator[int, str, bool]
return_annotation = deepcopy(node.returns)
if detector.contains_yields and new_memo.name_matches(
return_annotation, *generator_names
):
if isinstance(return_annotation, Subscript):
annotation_slice = return_annotation.slice
# Python < 3.9
if isinstance(annotation_slice, Index):
annotation_slice = (
annotation_slice.value # type: ignore[attr-defined]
)
if isinstance(annotation_slice, Tuple):
items = annotation_slice.elts
else:
items = [annotation_slice]
if len(items) > 0:
new_memo.yield_annotation = self._convert_annotation(
items[0]
)
if len(items) > 1:
new_memo.send_annotation = self._convert_annotation(
items[1]
)
if len(items) > 2:
new_memo.return_annotation = self._convert_annotation(
items[2]
)
else:
new_memo.return_annotation = self._convert_annotation(
return_annotation
)
if isinstance(node, AsyncFunctionDef):
new_memo.is_async = True
yield
self._memo = old_memo
def _get_import(self, module: str, name: str) -> Name:
memo = self._memo if self._target_path else self._module_memo
return memo.get_import(module, name)
@overload
def _convert_annotation(self, annotation: None) -> None: ...
@overload
def _convert_annotation(self, annotation: expr) -> expr: ...
def _convert_annotation(self, annotation: expr | None) -> expr | None:
if annotation is None:
return None
# Convert PEP 604 unions (x | y) and generic built-in collections where
# necessary, and undo forward references
new_annotation = cast(expr, AnnotationTransformer(self).visit(annotation))
if isinstance(new_annotation, expr):
new_annotation = ast.copy_location(new_annotation, annotation)
# Store names used in the annotation
names = {node.id for node in walk(new_annotation) if isinstance(node, Name)}
self.names_used_in_annotations.update(names)
return new_annotation
def visit_Name(self, node: Name) -> Name:
self._memo.local_names.add(node.id)
return node
def visit_Module(self, node: Module) -> Module:
self._module_memo = self._memo = TransformMemo(node, None, ())
self.generic_visit(node)
self._module_memo.insert_imports(node)
fix_missing_locations(node)
return node
def visit_Import(self, node: Import) -> Import:
for name in node.names:
self._memo.local_names.add(name.asname or name.name)
self._memo.imported_names[name.asname or name.name] = name.name
return node
def visit_ImportFrom(self, node: ImportFrom) -> ImportFrom:
for name in node.names:
if name.name != "*":
alias = name.asname or name.name
self._memo.local_names.add(alias)
self._memo.imported_names[alias] = f"{node.module}.{name.name}"
return node
def visit_ClassDef(self, node: ClassDef) -> ClassDef | None:
self._memo.local_names.add(node.name)
# Eliminate top level classes not belonging to the target path
if (
self._target_path is not None
and not self._memo.path
and node.name != self._target_path[0]
):
return None
with self._use_memo(node):
for decorator in node.decorator_list.copy():
if self._memo.name_matches(decorator, "typeguard.typechecked"):
# Remove the decorator to prevent duplicate instrumentation
node.decorator_list.remove(decorator)
# Store any configuration overrides
if isinstance(decorator, Call) and decorator.keywords:
self._memo.configuration_overrides.update(
{kw.arg: kw.value for kw in decorator.keywords if kw.arg}
)
self.generic_visit(node)
return node
def visit_FunctionDef(
self, node: FunctionDef | AsyncFunctionDef
) -> FunctionDef | AsyncFunctionDef | None:
"""
Injects type checks for function arguments, and for a return of None if the
function is annotated to return something else than Any or None, and the body
ends without an explicit "return".
"""
self._memo.local_names.add(node.name)
# Eliminate top level functions not belonging to the target path
if (
self._target_path is not None
and not self._memo.path
and node.name != self._target_path[0]
):
return None
# Skip instrumentation if we're instrumenting the whole module and the function
# contains either @no_type_check or @typeguard_ignore
if self._target_path is None:
for decorator in node.decorator_list:
if self._memo.name_matches(decorator, *ignore_decorators):
return node
with self._use_memo(node):
arg_annotations: dict[str, Any] = {}
if self._target_path is None or self._memo.path == self._target_path:
# Find line number we're supposed to match against
if node.decorator_list:
first_lineno = node.decorator_list[0].lineno
else:
first_lineno = node.lineno
for decorator in node.decorator_list.copy():
if self._memo.name_matches(decorator, "typing.overload"):
# Remove overloads entirely
return None
elif self._memo.name_matches(decorator, "typeguard.typechecked"):
# Remove the decorator to prevent duplicate instrumentation
node.decorator_list.remove(decorator)
# Store any configuration overrides
if isinstance(decorator, Call) and decorator.keywords:
self._memo.configuration_overrides = {
kw.arg: kw.value for kw in decorator.keywords if kw.arg
}
if self.target_lineno == first_lineno:
assert self.target_node is None
self.target_node = node
if node.decorator_list:
self.target_lineno = node.decorator_list[0].lineno
else:
self.target_lineno = node.lineno
all_args = node.args.args + node.args.kwonlyargs + node.args.posonlyargs
# Ensure that any type shadowed by the positional or keyword-only
# argument names are ignored in this function
for arg in all_args:
self._memo.ignored_names.add(arg.arg)
# Ensure that any type shadowed by the variable positional argument name
# (e.g. "args" in *args) is ignored this function
if node.args.vararg:
self._memo.ignored_names.add(node.args.vararg.arg)
# Ensure that any type shadowed by the variable keywrod argument name
# (e.g. "kwargs" in *kwargs) is ignored this function
if node.args.kwarg:
self._memo.ignored_names.add(node.args.kwarg.arg)
for arg in all_args:
annotation = self._convert_annotation(deepcopy(arg.annotation))
if annotation:
arg_annotations[arg.arg] = annotation
if node.args.vararg:
annotation_ = self._convert_annotation(node.args.vararg.annotation)
if annotation_:
if sys.version_info >= (3, 9):
container = Name("tuple", ctx=Load())
else:
container = self._get_import("typing", "Tuple")
subscript_slice: Tuple | Index = Tuple(
[
annotation_,
Constant(Ellipsis),
],
ctx=Load(),
)
if sys.version_info < (3, 9):
subscript_slice = Index(subscript_slice, ctx=Load())
arg_annotations[node.args.vararg.arg] = Subscript(
container, subscript_slice, ctx=Load()
)
if node.args.kwarg:
annotation_ = self._convert_annotation(node.args.kwarg.annotation)
if annotation_:
if sys.version_info >= (3, 9):
container = Name("dict", ctx=Load())
else:
container = self._get_import("typing", "Dict")
subscript_slice = Tuple(
[
Name("str", ctx=Load()),
annotation_,
],
ctx=Load(),
)
if sys.version_info < (3, 9):
subscript_slice = Index(subscript_slice, ctx=Load())
arg_annotations[node.args.kwarg.arg] = Subscript(
container, subscript_slice, ctx=Load()
)
if arg_annotations:
self._memo.variable_annotations.update(arg_annotations)
self.generic_visit(node)
if arg_annotations:
annotations_dict = Dict(
keys=[Constant(key) for key in arg_annotations.keys()],
values=[
Tuple([Name(key, ctx=Load()), annotation], ctx=Load())
for key, annotation in arg_annotations.items()
],
)
func_name = self._get_import(
"typeguard._functions", "check_argument_types"
)
args = [
self._memo.joined_path,
annotations_dict,
self._memo.get_memo_name(),
]
node.body.insert(
self._memo.code_inject_index, Expr(Call(func_name, args, []))
)
# Add a checked "return None" to the end if there's no explicit return
# Skip if the return annotation is None or Any
if (
self._memo.return_annotation
and (not self._memo.is_async or not self._memo.has_yield_expressions)
and not isinstance(node.body[-1], Return)
and (
not isinstance(self._memo.return_annotation, Constant)
or self._memo.return_annotation.value is not None
)
):
func_name = self._get_import(
"typeguard._functions", "check_return_type"
)
return_node = Return(
Call(
func_name,
[
self._memo.joined_path,
Constant(None),
self._memo.return_annotation,
self._memo.get_memo_name(),
],
[],
)
)
# Replace a placeholder "pass" at the end
if isinstance(node.body[-1], Pass):
copy_location(return_node, node.body[-1])
del node.body[-1]
node.body.append(return_node)
# Insert code to create the call memo, if it was ever needed for this
# function
if self._memo.memo_var_name:
memo_kwargs: dict[str, Any] = {}
if self._memo.parent and isinstance(self._memo.parent.node, ClassDef):
for decorator in node.decorator_list:
if (
isinstance(decorator, Name)
and decorator.id == "staticmethod"
):
break
elif (
isinstance(decorator, Name)
and decorator.id == "classmethod"
):
memo_kwargs["self_type"] = Name(
id=node.args.args[0].arg, ctx=Load()
)
break
else:
if node.args.args:
if node.name == "__new__":
memo_kwargs["self_type"] = Name(
id=node.args.args[0].arg, ctx=Load()
)
else:
memo_kwargs["self_type"] = Attribute(
Name(id=node.args.args[0].arg, ctx=Load()),
"__class__",
ctx=Load(),
)
# Construct the function reference
# Nested functions get special treatment: the function name is added
# to free variables (and the closure of the resulting function)
names: list[str] = [node.name]
memo = self._memo.parent
while memo:
if isinstance(memo.node, (FunctionDef, AsyncFunctionDef)):
# This is a nested function. Use the function name as-is.
del names[:-1]
break
elif not isinstance(memo.node, ClassDef):
break
names.insert(0, memo.node.name)
memo = memo.parent
config_keywords = self._memo.get_config_keywords()
if config_keywords:
memo_kwargs["config"] = Call(
self._get_import("dataclasses", "replace"),
[self._get_import("typeguard._config", "global_config")],
config_keywords,
)
self._memo.memo_var_name.id = self._memo.get_unused_name("memo")
memo_store_name = Name(id=self._memo.memo_var_name.id, ctx=Store())
globals_call = Call(Name(id="globals", ctx=Load()), [], [])
locals_call = Call(Name(id="locals", ctx=Load()), [], [])
memo_expr = Call(
self._get_import("typeguard", "TypeCheckMemo"),
[globals_call, locals_call],
[keyword(key, value) for key, value in memo_kwargs.items()],
)
node.body.insert(
self._memo.code_inject_index,
Assign([memo_store_name], memo_expr),
)
self._memo.insert_imports(node)
# Special case the __new__() method to create a local alias from the
# class name to the first argument (usually "cls")
if (
isinstance(node, FunctionDef)
and node.args
and self._memo.parent is not None
and isinstance(self._memo.parent.node, ClassDef)
and node.name == "__new__"
):
first_args_expr = Name(node.args.args[0].arg, ctx=Load())
cls_name = Name(self._memo.parent.node.name, ctx=Store())
node.body.insert(
self._memo.code_inject_index,
Assign([cls_name], first_args_expr),
)
# Rmove any placeholder "pass" at the end
if isinstance(node.body[-1], Pass):
del node.body[-1]
return node
def visit_AsyncFunctionDef(
self, node: AsyncFunctionDef
) -> FunctionDef | AsyncFunctionDef | None:
return self.visit_FunctionDef(node)
def visit_Return(self, node: Return) -> Return:
"""This injects type checks into "return" statements."""
self.generic_visit(node)
if (
self._memo.return_annotation
and self._memo.should_instrument
and not self._memo.is_ignored_name(self._memo.return_annotation)
):
func_name = self._get_import("typeguard._functions", "check_return_type")
old_node = node
retval = old_node.value or Constant(None)
node = Return(
Call(
func_name,
[
self._memo.joined_path,
retval,
self._memo.return_annotation,
self._memo.get_memo_name(),
],
[],
)
)
copy_location(node, old_node)
return node
def visit_Yield(self, node: Yield) -> Yield | Call:
"""
This injects type checks into "yield" expressions, checking both the yielded
value and the value sent back to the generator, when appropriate.
"""
self._memo.has_yield_expressions = True
self.generic_visit(node)
if (
self._memo.yield_annotation
and self._memo.should_instrument
and not self._memo.is_ignored_name(self._memo.yield_annotation)
):
func_name = self._get_import("typeguard._functions", "check_yield_type")
yieldval = node.value or Constant(None)
node.value = Call(
func_name,
[
self._memo.joined_path,
yieldval,
self._memo.yield_annotation,
self._memo.get_memo_name(),
],
[],
)
if (
self._memo.send_annotation
and self._memo.should_instrument
and not self._memo.is_ignored_name(self._memo.send_annotation)
):
func_name = self._get_import("typeguard._functions", "check_send_type")
old_node = node
call_node = Call(
func_name,
[
self._memo.joined_path,
old_node,
self._memo.send_annotation,
self._memo.get_memo_name(),
],
[],
)
copy_location(call_node, old_node)
return call_node
return node
def visit_AnnAssign(self, node: AnnAssign) -> Any:
"""
This injects a type check into a local variable annotation-assignment within a
function body.
"""
self.generic_visit(node)
if (
isinstance(self._memo.node, (FunctionDef, AsyncFunctionDef))
and node.annotation
and isinstance(node.target, Name)
):
self._memo.ignored_names.add(node.target.id)
annotation = self._convert_annotation(deepcopy(node.annotation))
if annotation:
self._memo.variable_annotations[node.target.id] = annotation
if node.value:
func_name = self._get_import(
"typeguard._functions", "check_variable_assignment"
)
node.value = Call(
func_name,
[
node.value,
Constant(node.target.id),
annotation,
self._memo.get_memo_name(),
],
[],
)
return node
def visit_Assign(self, node: Assign) -> Any:
"""
This injects a type check into a local variable assignment within a function
body. The variable must have been annotated earlier in the function body.
"""
self.generic_visit(node)
# Only instrument function-local assignments
if isinstance(self._memo.node, (FunctionDef, AsyncFunctionDef)):
targets: list[dict[Constant, expr | None]] = []
check_required = False
for target in node.targets:
elts: Sequence[expr]
if isinstance(target, Name):
elts = [target]
elif isinstance(target, Tuple):
elts = target.elts
else:
continue
annotations_: dict[Constant, expr | None] = {}
for exp in elts:
prefix = ""
if isinstance(exp, Starred):
exp = exp.value
prefix = "*"
if isinstance(exp, Name):
self._memo.ignored_names.add(exp.id)
name = prefix + exp.id
annotation = self._memo.variable_annotations.get(exp.id)
if annotation:
annotations_[Constant(name)] = annotation
check_required = True
else:
annotations_[Constant(name)] = None
targets.append(annotations_)
if check_required:
# Replace missing annotations with typing.Any
for item in targets:
for key, expression in item.items():
if expression is None:
item[key] = self._get_import("typing", "Any")
if len(targets) == 1 and len(targets[0]) == 1:
func_name = self._get_import(
"typeguard._functions", "check_variable_assignment"
)
target_varname = next(iter(targets[0]))
node.value = Call(
func_name,
[
node.value,
target_varname,
targets[0][target_varname],
self._memo.get_memo_name(),
],
[],
)
elif targets:
func_name = self._get_import(
"typeguard._functions", "check_multi_variable_assignment"
)
targets_arg = List(
[
Dict(keys=list(target), values=list(target.values()))
for target in targets
],
ctx=Load(),
)
node.value = Call(
func_name,
[node.value, targets_arg, self._memo.get_memo_name()],
[],
)
return node
def visit_NamedExpr(self, node: NamedExpr) -> Any:
"""This injects a type check into an assignment expression (a := foo())."""
self.generic_visit(node)
# Only instrument function-local assignments
if isinstance(self._memo.node, (FunctionDef, AsyncFunctionDef)) and isinstance(
node.target, Name
):
self._memo.ignored_names.add(node.target.id)
# Bail out if no matching annotation is found
annotation = self._memo.variable_annotations.get(node.target.id)
if annotation is None:
return node
func_name = self._get_import(
"typeguard._functions", "check_variable_assignment"
)
node.value = Call(
func_name,
[
node.value,
Constant(node.target.id),
annotation,
self._memo.get_memo_name(),
],
[],
)
return node
def visit_AugAssign(self, node: AugAssign) -> Any:
"""
This injects a type check into an augmented assignment expression (a += 1).
"""
self.generic_visit(node)
# Only instrument function-local assignments
if isinstance(self._memo.node, (FunctionDef, AsyncFunctionDef)) and isinstance(
node.target, Name
):
# Bail out if no matching annotation is found
annotation = self._memo.variable_annotations.get(node.target.id)
if annotation is None:
return node
# Bail out if the operator is not found (newer Python version?)
try:
operator_func_name = aug_assign_functions[node.op.__class__]
except KeyError:
return node
operator_func = self._get_import("operator", operator_func_name)
operator_call = Call(
operator_func, [Name(node.target.id, ctx=Load()), node.value], []
)
check_call = Call(
self._get_import("typeguard._functions", "check_variable_assignment"),
[
operator_call,
Constant(node.target.id),
annotation,
self._memo.get_memo_name(),
],
[],
)
return Assign(targets=[node.target], value=check_call)
return node
def visit_If(self, node: If) -> Any:
"""
This blocks names from being collected from a module-level
"if typing.TYPE_CHECKING:" block, so that they won't be type checked.
"""
self.generic_visit(node)
if (
self._memo is self._module_memo
and isinstance(node.test, Name)
and self._memo.name_matches(node.test, "typing.TYPE_CHECKING")
):
collector = NameCollector()
collector.visit(node)
self._memo.ignored_names.update(collector.names)
return node
================================================
FILE: metaflow/_vendor/typeguard/_union_transformer.py
================================================
"""
Transforms lazily evaluated PEP 604 unions into typing.Unions, for compatibility with
Python versions older than 3.10.
"""
from __future__ import annotations
from ast import (
BinOp,
BitOr,
Index,
Load,
Name,
NodeTransformer,
Subscript,
fix_missing_locations,
parse,
)
from ast import Tuple as ASTTuple
from types import CodeType
from typing import Any, Dict, FrozenSet, List, Set, Tuple, Union
type_substitutions = {
"dict": Dict,
"list": List,
"tuple": Tuple,
"set": Set,
"frozenset": FrozenSet,
"Union": Union,
}
class UnionTransformer(NodeTransformer):
def __init__(self, union_name: Name | None = None):
self.union_name = union_name or Name(id="Union", ctx=Load())
def visit_BinOp(self, node: BinOp) -> Any:
self.generic_visit(node)
if isinstance(node.op, BitOr):
return Subscript(
value=self.union_name,
slice=Index(
ASTTuple(elts=[node.left, node.right], ctx=Load()), ctx=Load()
),
ctx=Load(),
)
return node
def compile_type_hint(hint: str) -> CodeType:
parsed = parse(hint, "", "eval")
UnionTransformer().visit(parsed)
fix_missing_locations(parsed)
return compile(parsed, "", "eval", flags=0)
================================================
FILE: metaflow/_vendor/typeguard/_utils.py
================================================
from __future__ import annotations
import inspect
import sys
from importlib import import_module
from inspect import currentframe
from types import CodeType, FrameType, FunctionType
from typing import TYPE_CHECKING, Any, Callable, ForwardRef, Union, cast, final
from weakref import WeakValueDictionary
if TYPE_CHECKING:
from ._memo import TypeCheckMemo
if sys.version_info >= (3, 13):
from typing import get_args, get_origin
def evaluate_forwardref(forwardref: ForwardRef, memo: TypeCheckMemo) -> Any:
return forwardref._evaluate(
memo.globals, memo.locals, type_params=(), recursive_guard=frozenset()
)
elif sys.version_info >= (3, 10):
from typing import get_args, get_origin
def evaluate_forwardref(forwardref: ForwardRef, memo: TypeCheckMemo) -> Any:
return forwardref._evaluate(
memo.globals, memo.locals, recursive_guard=frozenset()
)
else:
from metaflow._vendor.typing_extensions import get_args, get_origin
evaluate_extra_args: tuple[frozenset[Any], ...] = (
(frozenset(),) if sys.version_info >= (3, 9) else ()
)
def evaluate_forwardref(forwardref: ForwardRef, memo: TypeCheckMemo) -> Any:
from ._union_transformer import compile_type_hint, type_substitutions
if not forwardref.__forward_evaluated__:
forwardref.__forward_code__ = compile_type_hint(forwardref.__forward_arg__)
try:
return forwardref._evaluate(memo.globals, memo.locals, *evaluate_extra_args)
except NameError:
if sys.version_info < (3, 10):
# Try again, with the type substitutions (list -> List etc.) in place
new_globals = memo.globals.copy()
new_globals.setdefault("Union", Union)
if sys.version_info < (3, 9):
new_globals.update(type_substitutions)
return forwardref._evaluate(
new_globals, memo.locals or new_globals, *evaluate_extra_args
)
raise
_functions_map: WeakValueDictionary[CodeType, FunctionType] = WeakValueDictionary()
def get_type_name(type_: Any) -> str:
name: str
for attrname in "__name__", "_name", "__forward_arg__":
candidate = getattr(type_, attrname, None)
if isinstance(candidate, str):
name = candidate
break
else:
origin = get_origin(type_)
candidate = getattr(origin, "_name", None)
if candidate is None:
candidate = type_.__class__.__name__.strip("_")
if isinstance(candidate, str):
name = candidate
else:
return "(unknown)"
args = get_args(type_)
if args:
if name == "Literal":
formatted_args = ", ".join(repr(arg) for arg in args)
else:
formatted_args = ", ".join(get_type_name(arg) for arg in args)
name += f"[{formatted_args}]"
module = getattr(type_, "__module__", None)
if module and module not in (None, "typing", "typing_extensions", "builtins"):
name = module + "." + name
return name
def qualified_name(obj: Any, *, add_class_prefix: bool = False) -> str:
"""
Return the qualified name (e.g. package.module.Type) for the given object.
Builtins and types from the :mod:`typing` package get special treatment by having
the module name stripped from the generated name.
"""
if obj is None:
return "None"
elif inspect.isclass(obj):
prefix = "class " if add_class_prefix else ""
type_ = obj
else:
prefix = ""
type_ = type(obj)
module = type_.__module__
qualname = type_.__qualname__
name = qualname if module in ("typing", "builtins") else f"{module}.{qualname}"
return prefix + name
def function_name(func: Callable[..., Any]) -> str:
"""
Return the qualified name of the given function.
Builtins and types from the :mod:`typing` package get special treatment by having
the module name stripped from the generated name.
"""
# For partial functions and objects with __call__ defined, __qualname__ does not
# exist
module = getattr(func, "__module__", "")
qualname = (module + ".") if module not in ("builtins", "") else ""
return qualname + getattr(func, "__qualname__", repr(func))
def resolve_reference(reference: str) -> Any:
modulename, varname = reference.partition(":")[::2]
if not modulename or not varname:
raise ValueError(f"{reference!r} is not a module:varname reference")
obj = import_module(modulename)
for attr in varname.split("."):
obj = getattr(obj, attr)
return obj
def is_method_of(obj: object, cls: type) -> bool:
return (
inspect.isfunction(obj)
and obj.__module__ == cls.__module__
and obj.__qualname__.startswith(cls.__qualname__ + ".")
)
def get_stacklevel() -> int:
level = 1
frame = cast(FrameType, currentframe()).f_back
while frame and frame.f_globals.get("__name__", "").startswith("typeguard."):
level += 1
frame = frame.f_back
return level
@final
class Unset:
__slots__ = ()
def __repr__(self) -> str:
return ""
unset = Unset()
================================================
FILE: metaflow/_vendor/typeguard/py.typed
================================================
================================================
FILE: metaflow/_vendor/typeguard.LICENSE
================================================
This is the MIT license: http://www.opensource.org/licenses/mit-license.php
Copyright (c) Alex Grönholm
Permission is hereby granted, free of charge, to any person obtaining a copy of this
software and associated documentation files (the "Software"), to deal in the Software
without restriction, including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or
substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
================================================
FILE: metaflow/_vendor/typing_extensions.LICENSE
================================================
A. HISTORY OF THE SOFTWARE
==========================
Python was created in the early 1990s by Guido van Rossum at Stichting
Mathematisch Centrum (CWI, see https://www.cwi.nl) in the Netherlands
as a successor of a language called ABC. Guido remains Python's
principal author, although it includes many contributions from others.
In 1995, Guido continued his work on Python at the Corporation for
National Research Initiatives (CNRI, see https://www.cnri.reston.va.us)
in Reston, Virginia where he released several versions of the
software.
In May 2000, Guido and the Python core development team moved to
BeOpen.com to form the BeOpen PythonLabs team. In October of the same
year, the PythonLabs team moved to Digital Creations, which became
Zope Corporation. In 2001, the Python Software Foundation (PSF, see
https://www.python.org/psf/) was formed, a non-profit organization
created specifically to own Python-related Intellectual Property.
Zope Corporation was a sponsoring member of the PSF.
All Python releases are Open Source (see https://opensource.org for
the Open Source Definition). Historically, most, but not all, Python
releases have also been GPL-compatible; the table below summarizes
the various releases.
Release Derived Year Owner GPL-
from compatible? (1)
0.9.0 thru 1.2 1991-1995 CWI yes
1.3 thru 1.5.2 1.2 1995-1999 CNRI yes
1.6 1.5.2 2000 CNRI no
2.0 1.6 2000 BeOpen.com no
1.6.1 1.6 2001 CNRI yes (2)
2.1 2.0+1.6.1 2001 PSF no
2.0.1 2.0+1.6.1 2001 PSF yes
2.1.1 2.1+2.0.1 2001 PSF yes
2.1.2 2.1.1 2002 PSF yes
2.1.3 2.1.2 2002 PSF yes
2.2 and above 2.1.1 2001-now PSF yes
Footnotes:
(1) GPL-compatible doesn't mean that we're distributing Python under
the GPL. All Python licenses, unlike the GPL, let you distribute
a modified version without making your changes open source. The
GPL-compatible licenses make it possible to combine Python with
other software that is released under the GPL; the others don't.
(2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
because its license has a choice of law clause. According to
CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
is "not incompatible" with the GPL.
Thanks to the many outside volunteers who have worked under Guido's
direction to make these releases possible.
B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
===============================================================
Python software and documentation are licensed under the
Python Software Foundation License Version 2.
Starting with Python 3.8.6, examples, recipes, and other code in
the documentation are dual licensed under the PSF License Version 2
and the Zero-Clause BSD license.
Some software incorporated into Python is under different licenses.
The licenses are listed with code falling under that license.
PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
--------------------------------------------
1. This LICENSE AGREEMENT is between the Python Software Foundation
("PSF"), and the Individual or Organization ("Licensee") accessing and
otherwise using this software ("Python") in source or binary form and
its associated documentation.
2. Subject to the terms and conditions of this License Agreement, PSF hereby
grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
analyze, test, perform and/or display publicly, prepare derivative works,
distribute, and otherwise use Python alone or in any derivative version,
provided, however, that PSF's License Agreement and PSF's notice of copyright,
i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation;
All Rights Reserved" are retained in Python alone or in any derivative version
prepared by Licensee.
3. In the event Licensee prepares a derivative work that is based on
or incorporates Python or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to Python.
4. PSF is making Python available to Licensee on an "AS IS"
basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between PSF and
Licensee. This License Agreement does not grant permission to use PSF
trademarks or trade name in a trademark sense to endorse or promote
products or services of Licensee, or any third party.
8. By copying, installing or otherwise using Python, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.
BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
-------------------------------------------
BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
Individual or Organization ("Licensee") accessing and otherwise using
this software in source or binary form and its associated
documentation ("the Software").
2. Subject to the terms and conditions of this BeOpen Python License
Agreement, BeOpen hereby grants Licensee a non-exclusive,
royalty-free, world-wide license to reproduce, analyze, test, perform
and/or display publicly, prepare derivative works, distribute, and
otherwise use the Software alone or in any derivative version,
provided, however, that the BeOpen Python License is retained in the
Software, alone or in any derivative version prepared by Licensee.
3. BeOpen is making the Software available to Licensee on an "AS IS"
basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
5. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
6. This License Agreement shall be governed by and interpreted in all
respects by the law of the State of California, excluding conflict of
law provisions. Nothing in this License Agreement shall be deemed to
create any relationship of agency, partnership, or joint venture
between BeOpen and Licensee. This License Agreement does not grant
permission to use BeOpen trademarks or trade names in a trademark
sense to endorse or promote products or services of Licensee, or any
third party. As an exception, the "BeOpen Python" logos available at
http://www.pythonlabs.com/logos.html may be used according to the
permissions granted on that web page.
7. By copying, installing or otherwise using the software, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.
CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
---------------------------------------
1. This LICENSE AGREEMENT is between the Corporation for National
Research Initiatives, having an office at 1895 Preston White Drive,
Reston, VA 20191 ("CNRI"), and the Individual or Organization
("Licensee") accessing and otherwise using Python 1.6.1 software in
source or binary form and its associated documentation.
2. Subject to the terms and conditions of this License Agreement, CNRI
hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display publicly,
prepare derivative works, distribute, and otherwise use Python 1.6.1
alone or in any derivative version, provided, however, that CNRI's
License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
1995-2001 Corporation for National Research Initiatives; All Rights
Reserved" are retained in Python 1.6.1 alone or in any derivative
version prepared by Licensee. Alternately, in lieu of CNRI's License
Agreement, Licensee may substitute the following text (omitting the
quotes): "Python 1.6.1 is made available subject to the terms and
conditions in CNRI's License Agreement. This Agreement together with
Python 1.6.1 may be located on the internet using the following
unique, persistent identifier (known as a handle): 1895.22/1013. This
Agreement may also be obtained from a proxy server on the internet
using the following URL: http://hdl.handle.net/1895.22/1013".
3. In the event Licensee prepares a derivative work that is based on
or incorporates Python 1.6.1 or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to Python 1.6.1.
4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. This License Agreement shall be governed by the federal
intellectual property law of the United States, including without
limitation the federal copyright law, and, to the extent such
U.S. federal law does not apply, by the law of the Commonwealth of
Virginia, excluding Virginia's conflict of law provisions.
Notwithstanding the foregoing, with regard to derivative works based
on Python 1.6.1 that incorporate non-separable material that was
previously distributed under the GNU General Public License (GPL), the
law of the Commonwealth of Virginia shall govern this License
Agreement only as to issues arising under or with respect to
Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this
License Agreement shall be deemed to create any relationship of
agency, partnership, or joint venture between CNRI and Licensee. This
License Agreement does not grant permission to use CNRI trademarks or
trade name in a trademark sense to endorse or promote products or
services of Licensee, or any third party.
8. By clicking on the "ACCEPT" button where indicated, or by copying,
installing or otherwise using Python 1.6.1, Licensee agrees to be
bound by the terms and conditions of this License Agreement.
ACCEPT
CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
--------------------------------------------------
Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
The Netherlands. All rights reserved.
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the name of Stichting Mathematisch
Centrum or CWI not be used in advertising or publicity pertaining to
distribution of the software without specific, written prior
permission.
STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
ZERO-CLAUSE BSD LICENSE FOR CODE IN THE PYTHON DOCUMENTATION
----------------------------------------------------------------------
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
================================================
FILE: metaflow/_vendor/typing_extensions.py
================================================
import abc
import collections
import collections.abc
import contextlib
import functools
import inspect
import operator
import sys
import types as _types
import typing
import warnings
__all__ = [
# Super-special typing primitives.
'Any',
'ClassVar',
'Concatenate',
'Final',
'LiteralString',
'ParamSpec',
'ParamSpecArgs',
'ParamSpecKwargs',
'Self',
'Type',
'TypeVar',
'TypeVarTuple',
'Unpack',
# ABCs (from collections.abc).
'Awaitable',
'AsyncIterator',
'AsyncIterable',
'Coroutine',
'AsyncGenerator',
'AsyncContextManager',
'Buffer',
'ChainMap',
# Concrete collection types.
'ContextManager',
'Counter',
'Deque',
'DefaultDict',
'NamedTuple',
'OrderedDict',
'TypedDict',
# Structural checks, a.k.a. protocols.
'SupportsAbs',
'SupportsBytes',
'SupportsComplex',
'SupportsFloat',
'SupportsIndex',
'SupportsInt',
'SupportsRound',
# One-off things.
'Annotated',
'assert_never',
'assert_type',
'clear_overloads',
'dataclass_transform',
'deprecated',
'Doc',
'get_overloads',
'final',
'get_args',
'get_origin',
'get_original_bases',
'get_protocol_members',
'get_type_hints',
'IntVar',
'is_protocol',
'is_typeddict',
'Literal',
'NewType',
'overload',
'override',
'Protocol',
'reveal_type',
'runtime',
'runtime_checkable',
'Text',
'TypeAlias',
'TypeAliasType',
'TypeGuard',
'TypeIs',
'TYPE_CHECKING',
'Never',
'NoReturn',
'ReadOnly',
'Required',
'NotRequired',
# Pure aliases, have always been in typing
'AbstractSet',
'AnyStr',
'BinaryIO',
'Callable',
'Collection',
'Container',
'Dict',
'ForwardRef',
'FrozenSet',
'Generator',
'Generic',
'Hashable',
'IO',
'ItemsView',
'Iterable',
'Iterator',
'KeysView',
'List',
'Mapping',
'MappingView',
'Match',
'MutableMapping',
'MutableSequence',
'MutableSet',
'NoDefault',
'Optional',
'Pattern',
'Reversible',
'Sequence',
'Set',
'Sized',
'TextIO',
'Tuple',
'Union',
'ValuesView',
'cast',
'no_type_check',
'no_type_check_decorator',
]
# for backward compatibility
PEP_560 = True
GenericMeta = type
_PEP_696_IMPLEMENTED = sys.version_info >= (3, 13, 0, "beta")
# The functions below are modified copies of typing internal helpers.
# They are needed by _ProtocolMeta and they provide support for PEP 646.
class _Sentinel:
def __repr__(self):
return ""
_marker = _Sentinel()
if sys.version_info >= (3, 10):
def _should_collect_from_parameters(t):
return isinstance(
t, (typing._GenericAlias, _types.GenericAlias, _types.UnionType)
)
elif sys.version_info >= (3, 9):
def _should_collect_from_parameters(t):
return isinstance(t, (typing._GenericAlias, _types.GenericAlias))
else:
def _should_collect_from_parameters(t):
return isinstance(t, typing._GenericAlias) and not t._special
NoReturn = typing.NoReturn
# Some unconstrained type variables. These are used by the container types.
# (These are not for export.)
T = typing.TypeVar('T') # Any type.
KT = typing.TypeVar('KT') # Key type.
VT = typing.TypeVar('VT') # Value type.
T_co = typing.TypeVar('T_co', covariant=True) # Any type covariant containers.
T_contra = typing.TypeVar('T_contra', contravariant=True) # Ditto contravariant.
if sys.version_info >= (3, 11):
from typing import Any
else:
class _AnyMeta(type):
def __instancecheck__(self, obj):
if self is Any:
raise TypeError("typing_extensions.Any cannot be used with isinstance()")
return super().__instancecheck__(obj)
def __repr__(self):
if self is Any:
return "typing_extensions.Any"
return super().__repr__()
class Any(metaclass=_AnyMeta):
"""Special type indicating an unconstrained type.
- Any is compatible with every type.
- Any assumed to have all methods.
- All values assumed to be instances of Any.
Note that all the above statements are true from the point of view of
static type checkers. At runtime, Any should not be used with instance
checks.
"""
def __new__(cls, *args, **kwargs):
if cls is Any:
raise TypeError("Any cannot be instantiated")
return super().__new__(cls, *args, **kwargs)
ClassVar = typing.ClassVar
class _ExtensionsSpecialForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
Final = typing.Final
if sys.version_info >= (3, 11):
final = typing.final
else:
# @final exists in 3.8+, but we backport it for all versions
# before 3.11 to keep support for the __final__ attribute.
# See https://bugs.python.org/issue46342
def final(f):
"""This decorator can be used to indicate to type checkers that
the decorated method cannot be overridden, and decorated class
cannot be subclassed. For example:
class Base:
@final
def done(self) -> None:
...
class Sub(Base):
def done(self) -> None: # Error reported by type checker
...
@final
class Leaf:
...
class Other(Leaf): # Error reported by type checker
...
There is no runtime checking of these properties. The decorator
sets the ``__final__`` attribute to ``True`` on the decorated object
to allow runtime introspection.
"""
try:
f.__final__ = True
except (AttributeError, TypeError):
# Skip the attribute silently if it is not writable.
# AttributeError happens if the object has __slots__ or a
# read-only property, TypeError if it's a builtin class.
pass
return f
def IntVar(name):
return typing.TypeVar(name)
# A Literal bug was fixed in 3.11.0, 3.10.1 and 3.9.8
if sys.version_info >= (3, 10, 1):
Literal = typing.Literal
else:
def _flatten_literal_params(parameters):
"""An internal helper for Literal creation: flatten Literals among parameters"""
params = []
for p in parameters:
if isinstance(p, _LiteralGenericAlias):
params.extend(p.__args__)
else:
params.append(p)
return tuple(params)
def _value_and_type_iter(params):
for p in params:
yield p, type(p)
class _LiteralGenericAlias(typing._GenericAlias, _root=True):
def __eq__(self, other):
if not isinstance(other, _LiteralGenericAlias):
return NotImplemented
these_args_deduped = set(_value_and_type_iter(self.__args__))
other_args_deduped = set(_value_and_type_iter(other.__args__))
return these_args_deduped == other_args_deduped
def __hash__(self):
return hash(frozenset(_value_and_type_iter(self.__args__)))
class _LiteralForm(_ExtensionsSpecialForm, _root=True):
def __init__(self, doc: str):
self._name = 'Literal'
self._doc = self.__doc__ = doc
def __getitem__(self, parameters):
if not isinstance(parameters, tuple):
parameters = (parameters,)
parameters = _flatten_literal_params(parameters)
val_type_pairs = list(_value_and_type_iter(parameters))
try:
deduped_pairs = set(val_type_pairs)
except TypeError:
# unhashable parameters
pass
else:
# similar logic to typing._deduplicate on Python 3.9+
if len(deduped_pairs) < len(val_type_pairs):
new_parameters = []
for pair in val_type_pairs:
if pair in deduped_pairs:
new_parameters.append(pair[0])
deduped_pairs.remove(pair)
assert not deduped_pairs, deduped_pairs
parameters = tuple(new_parameters)
return _LiteralGenericAlias(self, parameters)
Literal = _LiteralForm(doc="""\
A type that can be used to indicate to type checkers
that the corresponding value has a value literally equivalent
to the provided parameter. For example:
var: Literal[4] = 4
The type checker understands that 'var' is literally equal to
the value 4 and no other value.
Literal[...] cannot be subclassed. There is no runtime
checking verifying that the parameter is actually a value
instead of a type.""")
_overload_dummy = typing._overload_dummy
if hasattr(typing, "get_overloads"): # 3.11+
overload = typing.overload
get_overloads = typing.get_overloads
clear_overloads = typing.clear_overloads
else:
# {module: {qualname: {firstlineno: func}}}
_overload_registry = collections.defaultdict(
functools.partial(collections.defaultdict, dict)
)
def overload(func):
"""Decorator for overloaded functions/methods.
In a stub file, place two or more stub definitions for the same
function in a row, each decorated with @overload. For example:
@overload
def utf8(value: None) -> None: ...
@overload
def utf8(value: bytes) -> bytes: ...
@overload
def utf8(value: str) -> bytes: ...
In a non-stub file (i.e. a regular .py file), do the same but
follow it with an implementation. The implementation should *not*
be decorated with @overload. For example:
@overload
def utf8(value: None) -> None: ...
@overload
def utf8(value: bytes) -> bytes: ...
@overload
def utf8(value: str) -> bytes: ...
def utf8(value):
# implementation goes here
The overloads for a function can be retrieved at runtime using the
get_overloads() function.
"""
# classmethod and staticmethod
f = getattr(func, "__func__", func)
try:
_overload_registry[f.__module__][f.__qualname__][
f.__code__.co_firstlineno
] = func
except AttributeError:
# Not a normal function; ignore.
pass
return _overload_dummy
def get_overloads(func):
"""Return all defined overloads for *func* as a sequence."""
# classmethod and staticmethod
f = getattr(func, "__func__", func)
if f.__module__ not in _overload_registry:
return []
mod_dict = _overload_registry[f.__module__]
if f.__qualname__ not in mod_dict:
return []
return list(mod_dict[f.__qualname__].values())
def clear_overloads():
"""Clear all overloads in the registry."""
_overload_registry.clear()
# This is not a real generic class. Don't use outside annotations.
Type = typing.Type
# Various ABCs mimicking those in collections.abc.
# A few are simply re-exported for completeness.
Awaitable = typing.Awaitable
Coroutine = typing.Coroutine
AsyncIterable = typing.AsyncIterable
AsyncIterator = typing.AsyncIterator
Deque = typing.Deque
DefaultDict = typing.DefaultDict
OrderedDict = typing.OrderedDict
Counter = typing.Counter
ChainMap = typing.ChainMap
Text = typing.Text
TYPE_CHECKING = typing.TYPE_CHECKING
if sys.version_info >= (3, 13, 0, "beta"):
from typing import AsyncContextManager, AsyncGenerator, ContextManager, Generator
else:
def _is_dunder(attr):
return attr.startswith('__') and attr.endswith('__')
# Python <3.9 doesn't have typing._SpecialGenericAlias
_special_generic_alias_base = getattr(
typing, "_SpecialGenericAlias", typing._GenericAlias
)
class _SpecialGenericAlias(_special_generic_alias_base, _root=True):
def __init__(self, origin, nparams, *, inst=True, name=None, defaults=()):
if _special_generic_alias_base is typing._GenericAlias:
# Python <3.9
self.__origin__ = origin
self._nparams = nparams
super().__init__(origin, nparams, special=True, inst=inst, name=name)
else:
# Python >= 3.9
super().__init__(origin, nparams, inst=inst, name=name)
self._defaults = defaults
def __setattr__(self, attr, val):
allowed_attrs = {'_name', '_inst', '_nparams', '_defaults'}
if _special_generic_alias_base is typing._GenericAlias:
# Python <3.9
allowed_attrs.add("__origin__")
if _is_dunder(attr) or attr in allowed_attrs:
object.__setattr__(self, attr, val)
else:
setattr(self.__origin__, attr, val)
@typing._tp_cache
def __getitem__(self, params):
if not isinstance(params, tuple):
params = (params,)
msg = "Parameters to generic types must be types."
params = tuple(typing._type_check(p, msg) for p in params)
if (
self._defaults
and len(params) < self._nparams
and len(params) + len(self._defaults) >= self._nparams
):
params = (*params, *self._defaults[len(params) - self._nparams:])
actual_len = len(params)
if actual_len != self._nparams:
if self._defaults:
expected = f"at least {self._nparams - len(self._defaults)}"
else:
expected = str(self._nparams)
if not self._nparams:
raise TypeError(f"{self} is not a generic class")
raise TypeError(
f"Too {'many' if actual_len > self._nparams else 'few'}"
f" arguments for {self};"
f" actual {actual_len}, expected {expected}"
)
return self.copy_with(params)
_NoneType = type(None)
Generator = _SpecialGenericAlias(
collections.abc.Generator, 3, defaults=(_NoneType, _NoneType)
)
AsyncGenerator = _SpecialGenericAlias(
collections.abc.AsyncGenerator, 2, defaults=(_NoneType,)
)
ContextManager = _SpecialGenericAlias(
contextlib.AbstractContextManager,
2,
name="ContextManager",
defaults=(typing.Optional[bool],)
)
AsyncContextManager = _SpecialGenericAlias(
contextlib.AbstractAsyncContextManager,
2,
name="AsyncContextManager",
defaults=(typing.Optional[bool],)
)
_PROTO_ALLOWLIST = {
'collections.abc': [
'Callable', 'Awaitable', 'Iterable', 'Iterator', 'AsyncIterable',
'Hashable', 'Sized', 'Container', 'Collection', 'Reversible', 'Buffer',
],
'contextlib': ['AbstractContextManager', 'AbstractAsyncContextManager'],
'typing_extensions': ['Buffer'],
}
_EXCLUDED_ATTRS = frozenset(typing.EXCLUDED_ATTRIBUTES) | {
"__match_args__", "__protocol_attrs__", "__non_callable_proto_members__",
"__final__",
}
def _get_protocol_attrs(cls):
attrs = set()
for base in cls.__mro__[:-1]: # without object
if base.__name__ in {'Protocol', 'Generic'}:
continue
annotations = getattr(base, '__annotations__', {})
for attr in (*base.__dict__, *annotations):
if (not attr.startswith('_abc_') and attr not in _EXCLUDED_ATTRS):
attrs.add(attr)
return attrs
def _caller(depth=2):
try:
return sys._getframe(depth).f_globals.get('__name__', '__main__')
except (AttributeError, ValueError): # For platforms without _getframe()
return None
# `__match_args__` attribute was removed from protocol members in 3.13,
# we want to backport this change to older Python versions.
if sys.version_info >= (3, 13):
Protocol = typing.Protocol
else:
def _allow_reckless_class_checks(depth=3):
"""Allow instance and class checks for special stdlib modules.
The abc and functools modules indiscriminately call isinstance() and
issubclass() on the whole MRO of a user class, which may contain protocols.
"""
return _caller(depth) in {'abc', 'functools', None}
def _no_init(self, *args, **kwargs):
if type(self)._is_protocol:
raise TypeError('Protocols cannot be instantiated')
def _type_check_issubclass_arg_1(arg):
"""Raise TypeError if `arg` is not an instance of `type`
in `issubclass(arg, )`.
In most cases, this is verified by type.__subclasscheck__.
Checking it again unnecessarily would slow down issubclass() checks,
so, we don't perform this check unless we absolutely have to.
For various error paths, however,
we want to ensure that *this* error message is shown to the user
where relevant, rather than a typing.py-specific error message.
"""
if not isinstance(arg, type):
# Same error message as for issubclass(1, int).
raise TypeError('issubclass() arg 1 must be a class')
# Inheriting from typing._ProtocolMeta isn't actually desirable,
# but is necessary to allow typing.Protocol and typing_extensions.Protocol
# to mix without getting TypeErrors about "metaclass conflict"
class _ProtocolMeta(type(typing.Protocol)):
# This metaclass is somewhat unfortunate,
# but is necessary for several reasons...
#
# NOTE: DO NOT call super() in any methods in this class
# That would call the methods on typing._ProtocolMeta on Python 3.8-3.11
# and those are slow
def __new__(mcls, name, bases, namespace, **kwargs):
if name == "Protocol" and len(bases) < 2:
pass
elif {Protocol, typing.Protocol} & set(bases):
for base in bases:
if not (
base in {object, typing.Generic, Protocol, typing.Protocol}
or base.__name__ in _PROTO_ALLOWLIST.get(base.__module__, [])
or is_protocol(base)
):
raise TypeError(
f"Protocols can only inherit from other protocols, "
f"got {base!r}"
)
return abc.ABCMeta.__new__(mcls, name, bases, namespace, **kwargs)
def __init__(cls, *args, **kwargs):
abc.ABCMeta.__init__(cls, *args, **kwargs)
if getattr(cls, "_is_protocol", False):
cls.__protocol_attrs__ = _get_protocol_attrs(cls)
def __subclasscheck__(cls, other):
if cls is Protocol:
return type.__subclasscheck__(cls, other)
if (
getattr(cls, '_is_protocol', False)
and not _allow_reckless_class_checks()
):
if not getattr(cls, '_is_runtime_protocol', False):
_type_check_issubclass_arg_1(other)
raise TypeError(
"Instance and class checks can only be used with "
"@runtime_checkable protocols"
)
if (
# this attribute is set by @runtime_checkable:
cls.__non_callable_proto_members__
and cls.__dict__.get("__subclasshook__") is _proto_hook
):
_type_check_issubclass_arg_1(other)
non_method_attrs = sorted(cls.__non_callable_proto_members__)
raise TypeError(
"Protocols with non-method members don't support issubclass()."
f" Non-method members: {str(non_method_attrs)[1:-1]}."
)
return abc.ABCMeta.__subclasscheck__(cls, other)
def __instancecheck__(cls, instance):
# We need this method for situations where attributes are
# assigned in __init__.
if cls is Protocol:
return type.__instancecheck__(cls, instance)
if not getattr(cls, "_is_protocol", False):
# i.e., it's a concrete subclass of a protocol
return abc.ABCMeta.__instancecheck__(cls, instance)
if (
not getattr(cls, '_is_runtime_protocol', False) and
not _allow_reckless_class_checks()
):
raise TypeError("Instance and class checks can only be used with"
" @runtime_checkable protocols")
if abc.ABCMeta.__instancecheck__(cls, instance):
return True
for attr in cls.__protocol_attrs__:
try:
val = inspect.getattr_static(instance, attr)
except AttributeError:
break
# this attribute is set by @runtime_checkable:
if val is None and attr not in cls.__non_callable_proto_members__:
break
else:
return True
return False
def __eq__(cls, other):
# Hack so that typing.Generic.__class_getitem__
# treats typing_extensions.Protocol
# as equivalent to typing.Protocol
if abc.ABCMeta.__eq__(cls, other) is True:
return True
return cls is Protocol and other is typing.Protocol
# This has to be defined, or the abc-module cache
# complains about classes with this metaclass being unhashable,
# if we define only __eq__!
def __hash__(cls) -> int:
return type.__hash__(cls)
@classmethod
def _proto_hook(cls, other):
if not cls.__dict__.get('_is_protocol', False):
return NotImplemented
for attr in cls.__protocol_attrs__:
for base in other.__mro__:
# Check if the members appears in the class dictionary...
if attr in base.__dict__:
if base.__dict__[attr] is None:
return NotImplemented
break
# ...or in annotations, if it is a sub-protocol.
annotations = getattr(base, '__annotations__', {})
if (
isinstance(annotations, collections.abc.Mapping)
and attr in annotations
and is_protocol(other)
):
break
else:
return NotImplemented
return True
class Protocol(typing.Generic, metaclass=_ProtocolMeta):
__doc__ = typing.Protocol.__doc__
__slots__ = ()
_is_protocol = True
_is_runtime_protocol = False
def __init_subclass__(cls, *args, **kwargs):
super().__init_subclass__(*args, **kwargs)
# Determine if this is a protocol or a concrete subclass.
if not cls.__dict__.get('_is_protocol', False):
cls._is_protocol = any(b is Protocol for b in cls.__bases__)
# Set (or override) the protocol subclass hook.
if '__subclasshook__' not in cls.__dict__:
cls.__subclasshook__ = _proto_hook
# Prohibit instantiation for protocol classes
if cls._is_protocol and cls.__init__ is Protocol.__init__:
cls.__init__ = _no_init
if sys.version_info >= (3, 13):
runtime_checkable = typing.runtime_checkable
else:
def runtime_checkable(cls):
"""Mark a protocol class as a runtime protocol.
Such protocol can be used with isinstance() and issubclass().
Raise TypeError if applied to a non-protocol class.
This allows a simple-minded structural check very similar to
one trick ponies in collections.abc such as Iterable.
For example::
@runtime_checkable
class Closable(Protocol):
def close(self): ...
assert isinstance(open('/some/file'), Closable)
Warning: this will check only the presence of the required methods,
not their type signatures!
"""
if not issubclass(cls, typing.Generic) or not getattr(cls, '_is_protocol', False):
raise TypeError(f'@runtime_checkable can be only applied to protocol classes,'
f' got {cls!r}')
cls._is_runtime_protocol = True
# typing.Protocol classes on <=3.11 break if we execute this block,
# because typing.Protocol classes on <=3.11 don't have a
# `__protocol_attrs__` attribute, and this block relies on the
# `__protocol_attrs__` attribute. Meanwhile, typing.Protocol classes on 3.12.2+
# break if we *don't* execute this block, because *they* assume that all
# protocol classes have a `__non_callable_proto_members__` attribute
# (which this block sets)
if isinstance(cls, _ProtocolMeta) or sys.version_info >= (3, 12, 2):
# PEP 544 prohibits using issubclass()
# with protocols that have non-method members.
# See gh-113320 for why we compute this attribute here,
# rather than in `_ProtocolMeta.__init__`
cls.__non_callable_proto_members__ = set()
for attr in cls.__protocol_attrs__:
try:
is_callable = callable(getattr(cls, attr, None))
except Exception as e:
raise TypeError(
f"Failed to determine whether protocol member {attr!r} "
"is a method member"
) from e
else:
if not is_callable:
cls.__non_callable_proto_members__.add(attr)
return cls
# The "runtime" alias exists for backwards compatibility.
runtime = runtime_checkable
# Our version of runtime-checkable protocols is faster on Python 3.8-3.11
if sys.version_info >= (3, 12):
SupportsInt = typing.SupportsInt
SupportsFloat = typing.SupportsFloat
SupportsComplex = typing.SupportsComplex
SupportsBytes = typing.SupportsBytes
SupportsIndex = typing.SupportsIndex
SupportsAbs = typing.SupportsAbs
SupportsRound = typing.SupportsRound
else:
@runtime_checkable
class SupportsInt(Protocol):
"""An ABC with one abstract method __int__."""
__slots__ = ()
@abc.abstractmethod
def __int__(self) -> int:
pass
@runtime_checkable
class SupportsFloat(Protocol):
"""An ABC with one abstract method __float__."""
__slots__ = ()
@abc.abstractmethod
def __float__(self) -> float:
pass
@runtime_checkable
class SupportsComplex(Protocol):
"""An ABC with one abstract method __complex__."""
__slots__ = ()
@abc.abstractmethod
def __complex__(self) -> complex:
pass
@runtime_checkable
class SupportsBytes(Protocol):
"""An ABC with one abstract method __bytes__."""
__slots__ = ()
@abc.abstractmethod
def __bytes__(self) -> bytes:
pass
@runtime_checkable
class SupportsIndex(Protocol):
__slots__ = ()
@abc.abstractmethod
def __index__(self) -> int:
pass
@runtime_checkable
class SupportsAbs(Protocol[T_co]):
"""
An ABC with one abstract method __abs__ that is covariant in its return type.
"""
__slots__ = ()
@abc.abstractmethod
def __abs__(self) -> T_co:
pass
@runtime_checkable
class SupportsRound(Protocol[T_co]):
"""
An ABC with one abstract method __round__ that is covariant in its return type.
"""
__slots__ = ()
@abc.abstractmethod
def __round__(self, ndigits: int = 0) -> T_co:
pass
def _ensure_subclassable(mro_entries):
def inner(func):
if sys.implementation.name == "pypy" and sys.version_info < (3, 9):
cls_dict = {
"__call__": staticmethod(func),
"__mro_entries__": staticmethod(mro_entries)
}
t = type(func.__name__, (), cls_dict)
return functools.update_wrapper(t(), func)
else:
func.__mro_entries__ = mro_entries
return func
return inner
# Update this to something like >=3.13.0b1 if and when
# PEP 728 is implemented in CPython
_PEP_728_IMPLEMENTED = False
if _PEP_728_IMPLEMENTED:
# The standard library TypedDict in Python 3.8 does not store runtime information
# about which (if any) keys are optional. See https://bugs.python.org/issue38834
# The standard library TypedDict in Python 3.9.0/1 does not honour the "total"
# keyword with old-style TypedDict(). See https://bugs.python.org/issue42059
# The standard library TypedDict below Python 3.11 does not store runtime
# information about optional and required keys when using Required or NotRequired.
# Generic TypedDicts are also impossible using typing.TypedDict on Python <3.11.
# Aaaand on 3.12 we add __orig_bases__ to TypedDict
# to enable better runtime introspection.
# On 3.13 we deprecate some odd ways of creating TypedDicts.
# Also on 3.13, PEP 705 adds the ReadOnly[] qualifier.
# PEP 728 (still pending) makes more changes.
TypedDict = typing.TypedDict
_TypedDictMeta = typing._TypedDictMeta
is_typeddict = typing.is_typeddict
else:
# 3.10.0 and later
_TAKES_MODULE = "module" in inspect.signature(typing._type_check).parameters
def _get_typeddict_qualifiers(annotation_type):
while True:
annotation_origin = get_origin(annotation_type)
if annotation_origin is Annotated:
annotation_args = get_args(annotation_type)
if annotation_args:
annotation_type = annotation_args[0]
else:
break
elif annotation_origin is Required:
yield Required
annotation_type, = get_args(annotation_type)
elif annotation_origin is NotRequired:
yield NotRequired
annotation_type, = get_args(annotation_type)
elif annotation_origin is ReadOnly:
yield ReadOnly
annotation_type, = get_args(annotation_type)
else:
break
class _TypedDictMeta(type):
def __new__(cls, name, bases, ns, *, total=True, closed=False):
"""Create new typed dict class object.
This method is called when TypedDict is subclassed,
or when TypedDict is instantiated. This way
TypedDict supports all three syntax forms described in its docstring.
Subclasses and instances of TypedDict return actual dictionaries.
"""
for base in bases:
if type(base) is not _TypedDictMeta and base is not typing.Generic:
raise TypeError('cannot inherit from both a TypedDict type '
'and a non-TypedDict base class')
if any(issubclass(b, typing.Generic) for b in bases):
generic_base = (typing.Generic,)
else:
generic_base = ()
# typing.py generally doesn't let you inherit from plain Generic, unless
# the name of the class happens to be "Protocol"
tp_dict = type.__new__(_TypedDictMeta, "Protocol", (*generic_base, dict), ns)
tp_dict.__name__ = name
if tp_dict.__qualname__ == "Protocol":
tp_dict.__qualname__ = name
if not hasattr(tp_dict, '__orig_bases__'):
tp_dict.__orig_bases__ = bases
annotations = {}
if "__annotations__" in ns:
own_annotations = ns["__annotations__"]
elif "__annotate__" in ns:
# TODO: Use inspect.VALUE here, and make the annotations lazily evaluated
own_annotations = ns["__annotate__"](1)
else:
own_annotations = {}
msg = "TypedDict('Name', {f0: t0, f1: t1, ...}); each t must be a type"
if _TAKES_MODULE:
own_annotations = {
n: typing._type_check(tp, msg, module=tp_dict.__module__)
for n, tp in own_annotations.items()
}
else:
own_annotations = {
n: typing._type_check(tp, msg)
for n, tp in own_annotations.items()
}
required_keys = set()
optional_keys = set()
readonly_keys = set()
mutable_keys = set()
extra_items_type = None
for base in bases:
base_dict = base.__dict__
annotations.update(base_dict.get('__annotations__', {}))
required_keys.update(base_dict.get('__required_keys__', ()))
optional_keys.update(base_dict.get('__optional_keys__', ()))
readonly_keys.update(base_dict.get('__readonly_keys__', ()))
mutable_keys.update(base_dict.get('__mutable_keys__', ()))
base_extra_items_type = base_dict.get('__extra_items__', None)
if base_extra_items_type is not None:
extra_items_type = base_extra_items_type
if closed and extra_items_type is None:
extra_items_type = Never
if closed and "__extra_items__" in own_annotations:
annotation_type = own_annotations.pop("__extra_items__")
qualifiers = set(_get_typeddict_qualifiers(annotation_type))
if Required in qualifiers:
raise TypeError(
"Special key __extra_items__ does not support "
"Required"
)
if NotRequired in qualifiers:
raise TypeError(
"Special key __extra_items__ does not support "
"NotRequired"
)
extra_items_type = annotation_type
annotations.update(own_annotations)
for annotation_key, annotation_type in own_annotations.items():
qualifiers = set(_get_typeddict_qualifiers(annotation_type))
if Required in qualifiers:
required_keys.add(annotation_key)
elif NotRequired in qualifiers:
optional_keys.add(annotation_key)
elif total:
required_keys.add(annotation_key)
else:
optional_keys.add(annotation_key)
if ReadOnly in qualifiers:
mutable_keys.discard(annotation_key)
readonly_keys.add(annotation_key)
else:
mutable_keys.add(annotation_key)
readonly_keys.discard(annotation_key)
tp_dict.__annotations__ = annotations
tp_dict.__required_keys__ = frozenset(required_keys)
tp_dict.__optional_keys__ = frozenset(optional_keys)
tp_dict.__readonly_keys__ = frozenset(readonly_keys)
tp_dict.__mutable_keys__ = frozenset(mutable_keys)
if not hasattr(tp_dict, '__total__'):
tp_dict.__total__ = total
tp_dict.__closed__ = closed
tp_dict.__extra_items__ = extra_items_type
return tp_dict
__call__ = dict # static method
def __subclasscheck__(cls, other):
# Typed dicts are only for static structural subtyping.
raise TypeError('TypedDict does not support instance and class checks')
__instancecheck__ = __subclasscheck__
_TypedDict = type.__new__(_TypedDictMeta, 'TypedDict', (), {})
@_ensure_subclassable(lambda bases: (_TypedDict,))
def TypedDict(typename, fields=_marker, /, *, total=True, closed=False, **kwargs):
"""A simple typed namespace. At runtime it is equivalent to a plain dict.
TypedDict creates a dictionary type such that a type checker will expect all
instances to have a certain set of keys, where each key is
associated with a value of a consistent type. This expectation
is not checked at runtime.
Usage::
class Point2D(TypedDict):
x: int
y: int
label: str
a: Point2D = {'x': 1, 'y': 2, 'label': 'good'} # OK
b: Point2D = {'z': 3, 'label': 'bad'} # Fails type check
assert Point2D(x=1, y=2, label='first') == dict(x=1, y=2, label='first')
The type info can be accessed via the Point2D.__annotations__ dict, and
the Point2D.__required_keys__ and Point2D.__optional_keys__ frozensets.
TypedDict supports an additional equivalent form::
Point2D = TypedDict('Point2D', {'x': int, 'y': int, 'label': str})
By default, all keys must be present in a TypedDict. It is possible
to override this by specifying totality::
class Point2D(TypedDict, total=False):
x: int
y: int
This means that a Point2D TypedDict can have any of the keys omitted. A type
checker is only expected to support a literal False or True as the value of
the total argument. True is the default, and makes all items defined in the
class body be required.
The Required and NotRequired special forms can also be used to mark
individual keys as being required or not required::
class Point2D(TypedDict):
x: int # the "x" key must always be present (Required is the default)
y: NotRequired[int] # the "y" key can be omitted
See PEP 655 for more details on Required and NotRequired.
"""
if fields is _marker or fields is None:
if fields is _marker:
deprecated_thing = "Failing to pass a value for the 'fields' parameter"
else:
deprecated_thing = "Passing `None` as the 'fields' parameter"
example = f"`{typename} = TypedDict({typename!r}, {{}})`"
deprecation_msg = (
f"{deprecated_thing} is deprecated and will be disallowed in "
"Python 3.15. To create a TypedDict class with 0 fields "
"using the functional syntax, pass an empty dictionary, e.g. "
) + example + "."
warnings.warn(deprecation_msg, DeprecationWarning, stacklevel=2)
if closed is not False and closed is not True:
kwargs["closed"] = closed
closed = False
fields = kwargs
elif kwargs:
raise TypeError("TypedDict takes either a dict or keyword arguments,"
" but not both")
if kwargs:
if sys.version_info >= (3, 13):
raise TypeError("TypedDict takes no keyword arguments")
warnings.warn(
"The kwargs-based syntax for TypedDict definitions is deprecated "
"in Python 3.11, will be removed in Python 3.13, and may not be "
"understood by third-party type checkers.",
DeprecationWarning,
stacklevel=2,
)
ns = {'__annotations__': dict(fields)}
module = _caller()
if module is not None:
# Setting correct module is necessary to make typed dict classes pickleable.
ns['__module__'] = module
td = _TypedDictMeta(typename, (), ns, total=total, closed=closed)
td.__orig_bases__ = (TypedDict,)
return td
if hasattr(typing, "_TypedDictMeta"):
_TYPEDDICT_TYPES = (typing._TypedDictMeta, _TypedDictMeta)
else:
_TYPEDDICT_TYPES = (_TypedDictMeta,)
def is_typeddict(tp):
"""Check if an annotation is a TypedDict class
For example::
class Film(TypedDict):
title: str
year: int
is_typeddict(Film) # => True
is_typeddict(Union[list, str]) # => False
"""
# On 3.8, this would otherwise return True
if hasattr(typing, "TypedDict") and tp is typing.TypedDict:
return False
return isinstance(tp, _TYPEDDICT_TYPES)
if hasattr(typing, "assert_type"):
assert_type = typing.assert_type
else:
def assert_type(val, typ, /):
"""Assert (to the type checker) that the value is of the given type.
When the type checker encounters a call to assert_type(), it
emits an error if the value is not of the specified type::
def greet(name: str) -> None:
assert_type(name, str) # ok
assert_type(name, int) # type checker error
At runtime this returns the first argument unchanged and otherwise
does nothing.
"""
return val
if hasattr(typing, "ReadOnly"): # 3.13+
get_type_hints = typing.get_type_hints
else: # <=3.13
# replaces _strip_annotations()
def _strip_extras(t):
"""Strips Annotated, Required and NotRequired from a given type."""
if isinstance(t, _AnnotatedAlias):
return _strip_extras(t.__origin__)
if hasattr(t, "__origin__") and t.__origin__ in (Required, NotRequired, ReadOnly):
return _strip_extras(t.__args__[0])
if isinstance(t, typing._GenericAlias):
stripped_args = tuple(_strip_extras(a) for a in t.__args__)
if stripped_args == t.__args__:
return t
return t.copy_with(stripped_args)
if hasattr(_types, "GenericAlias") and isinstance(t, _types.GenericAlias):
stripped_args = tuple(_strip_extras(a) for a in t.__args__)
if stripped_args == t.__args__:
return t
return _types.GenericAlias(t.__origin__, stripped_args)
if hasattr(_types, "UnionType") and isinstance(t, _types.UnionType):
stripped_args = tuple(_strip_extras(a) for a in t.__args__)
if stripped_args == t.__args__:
return t
return functools.reduce(operator.or_, stripped_args)
return t
def get_type_hints(obj, globalns=None, localns=None, include_extras=False):
"""Return type hints for an object.
This is often the same as obj.__annotations__, but it handles
forward references encoded as string literals, adds Optional[t] if a
default value equal to None is set and recursively replaces all
'Annotated[T, ...]', 'Required[T]' or 'NotRequired[T]' with 'T'
(unless 'include_extras=True').
The argument may be a module, class, method, or function. The annotations
are returned as a dictionary. For classes, annotations include also
inherited members.
TypeError is raised if the argument is not of a type that can contain
annotations, and an empty dictionary is returned if no annotations are
present.
BEWARE -- the behavior of globalns and localns is counterintuitive
(unless you are familiar with how eval() and exec() work). The
search order is locals first, then globals.
- If no dict arguments are passed, an attempt is made to use the
globals from obj (or the respective module's globals for classes),
and these are also used as the locals. If the object does not appear
to have globals, an empty dictionary is used.
- If one dict argument is passed, it is used for both globals and
locals.
- If two dict arguments are passed, they specify globals and
locals, respectively.
"""
if hasattr(typing, "Annotated"): # 3.9+
hint = typing.get_type_hints(
obj, globalns=globalns, localns=localns, include_extras=True
)
else: # 3.8
hint = typing.get_type_hints(obj, globalns=globalns, localns=localns)
if include_extras:
return hint
return {k: _strip_extras(t) for k, t in hint.items()}
# Python 3.9+ has PEP 593 (Annotated)
if hasattr(typing, 'Annotated'):
Annotated = typing.Annotated
# Not exported and not a public API, but needed for get_origin() and get_args()
# to work.
_AnnotatedAlias = typing._AnnotatedAlias
# 3.8
else:
class _AnnotatedAlias(typing._GenericAlias, _root=True):
"""Runtime representation of an annotated type.
At its core 'Annotated[t, dec1, dec2, ...]' is an alias for the type 't'
with extra annotations. The alias behaves like a normal typing alias,
instantiating is the same as instantiating the underlying type, binding
it to types is also the same.
"""
def __init__(self, origin, metadata):
if isinstance(origin, _AnnotatedAlias):
metadata = origin.__metadata__ + metadata
origin = origin.__origin__
super().__init__(origin, origin)
self.__metadata__ = metadata
def copy_with(self, params):
assert len(params) == 1
new_type = params[0]
return _AnnotatedAlias(new_type, self.__metadata__)
def __repr__(self):
return (f"typing_extensions.Annotated[{typing._type_repr(self.__origin__)}, "
f"{', '.join(repr(a) for a in self.__metadata__)}]")
def __reduce__(self):
return operator.getitem, (
Annotated, (self.__origin__, *self.__metadata__)
)
def __eq__(self, other):
if not isinstance(other, _AnnotatedAlias):
return NotImplemented
if self.__origin__ != other.__origin__:
return False
return self.__metadata__ == other.__metadata__
def __hash__(self):
return hash((self.__origin__, self.__metadata__))
class Annotated:
"""Add context specific metadata to a type.
Example: Annotated[int, runtime_check.Unsigned] indicates to the
hypothetical runtime_check module that this type is an unsigned int.
Every other consumer of this type can ignore this metadata and treat
this type as int.
The first argument to Annotated must be a valid type (and will be in
the __origin__ field), the remaining arguments are kept as a tuple in
the __extra__ field.
Details:
- It's an error to call `Annotated` with less than two arguments.
- Nested Annotated are flattened::
Annotated[Annotated[T, Ann1, Ann2], Ann3] == Annotated[T, Ann1, Ann2, Ann3]
- Instantiating an annotated type is equivalent to instantiating the
underlying type::
Annotated[C, Ann1](5) == C(5)
- Annotated can be used as a generic type alias::
Optimized = Annotated[T, runtime.Optimize()]
Optimized[int] == Annotated[int, runtime.Optimize()]
OptimizedList = Annotated[List[T], runtime.Optimize()]
OptimizedList[int] == Annotated[List[int], runtime.Optimize()]
"""
__slots__ = ()
def __new__(cls, *args, **kwargs):
raise TypeError("Type Annotated cannot be instantiated.")
@typing._tp_cache
def __class_getitem__(cls, params):
if not isinstance(params, tuple) or len(params) < 2:
raise TypeError("Annotated[...] should be used "
"with at least two arguments (a type and an "
"annotation).")
allowed_special_forms = (ClassVar, Final)
if get_origin(params[0]) in allowed_special_forms:
origin = params[0]
else:
msg = "Annotated[t, ...]: t must be a type."
origin = typing._type_check(params[0], msg)
metadata = tuple(params[1:])
return _AnnotatedAlias(origin, metadata)
def __init_subclass__(cls, *args, **kwargs):
raise TypeError(
f"Cannot subclass {cls.__module__}.Annotated"
)
# Python 3.8 has get_origin() and get_args() but those implementations aren't
# Annotated-aware, so we can't use those. Python 3.9's versions don't support
# ParamSpecArgs and ParamSpecKwargs, so only Python 3.10's versions will do.
if sys.version_info[:2] >= (3, 10):
get_origin = typing.get_origin
get_args = typing.get_args
# 3.8-3.9
else:
try:
# 3.9+
from typing import _BaseGenericAlias
except ImportError:
_BaseGenericAlias = typing._GenericAlias
try:
# 3.9+
from typing import GenericAlias as _typing_GenericAlias
except ImportError:
_typing_GenericAlias = typing._GenericAlias
def get_origin(tp):
"""Get the unsubscripted version of a type.
This supports generic types, Callable, Tuple, Union, Literal, Final, ClassVar
and Annotated. Return None for unsupported types. Examples::
get_origin(Literal[42]) is Literal
get_origin(int) is None
get_origin(ClassVar[int]) is ClassVar
get_origin(Generic) is Generic
get_origin(Generic[T]) is Generic
get_origin(Union[T, int]) is Union
get_origin(List[Tuple[T, T]][int]) == list
get_origin(P.args) is P
"""
if isinstance(tp, _AnnotatedAlias):
return Annotated
if isinstance(tp, (typing._GenericAlias, _typing_GenericAlias, _BaseGenericAlias,
ParamSpecArgs, ParamSpecKwargs)):
return tp.__origin__
if tp is typing.Generic:
return typing.Generic
return None
def get_args(tp):
"""Get type arguments with all substitutions performed.
For unions, basic simplifications used by Union constructor are performed.
Examples::
get_args(Dict[str, int]) == (str, int)
get_args(int) == ()
get_args(Union[int, Union[T, int], str][int]) == (int, str)
get_args(Union[int, Tuple[T, int]][str]) == (int, Tuple[str, int])
get_args(Callable[[], T][int]) == ([], int)
"""
if isinstance(tp, _AnnotatedAlias):
return (tp.__origin__, *tp.__metadata__)
if isinstance(tp, (typing._GenericAlias, _typing_GenericAlias)):
if getattr(tp, "_special", False):
return ()
res = tp.__args__
if get_origin(tp) is collections.abc.Callable and res[0] is not Ellipsis:
res = (list(res[:-1]), res[-1])
return res
return ()
# 3.10+
if hasattr(typing, 'TypeAlias'):
TypeAlias = typing.TypeAlias
# 3.9
elif sys.version_info[:2] >= (3, 9):
@_ExtensionsSpecialForm
def TypeAlias(self, parameters):
"""Special marker indicating that an assignment should
be recognized as a proper type alias definition by type
checkers.
For example::
Predicate: TypeAlias = Callable[..., bool]
It's invalid when used anywhere except as in the example above.
"""
raise TypeError(f"{self} is not subscriptable")
# 3.8
else:
TypeAlias = _ExtensionsSpecialForm(
'TypeAlias',
doc="""Special marker indicating that an assignment should
be recognized as a proper type alias definition by type
checkers.
For example::
Predicate: TypeAlias = Callable[..., bool]
It's invalid when used anywhere except as in the example
above."""
)
if hasattr(typing, "NoDefault"):
NoDefault = typing.NoDefault
else:
class NoDefaultTypeMeta(type):
def __setattr__(cls, attr, value):
# TypeError is consistent with the behavior of NoneType
raise TypeError(
f"cannot set {attr!r} attribute of immutable type {cls.__name__!r}"
)
class NoDefaultType(metaclass=NoDefaultTypeMeta):
"""The type of the NoDefault singleton."""
__slots__ = ()
def __new__(cls):
return globals().get("NoDefault") or object.__new__(cls)
def __repr__(self):
return "typing_extensions.NoDefault"
def __reduce__(self):
return "NoDefault"
NoDefault = NoDefaultType()
del NoDefaultType, NoDefaultTypeMeta
def _set_default(type_param, default):
type_param.has_default = lambda: default is not NoDefault
type_param.__default__ = default
def _set_module(typevarlike):
# for pickling:
def_mod = _caller(depth=3)
if def_mod != 'typing_extensions':
typevarlike.__module__ = def_mod
class _DefaultMixin:
"""Mixin for TypeVarLike defaults."""
__slots__ = ()
__init__ = _set_default
# Classes using this metaclass must provide a _backported_typevarlike ClassVar
class _TypeVarLikeMeta(type):
def __instancecheck__(cls, __instance: Any) -> bool:
return isinstance(__instance, cls._backported_typevarlike)
if _PEP_696_IMPLEMENTED:
from typing import TypeVar
else:
# Add default and infer_variance parameters from PEP 696 and 695
class TypeVar(metaclass=_TypeVarLikeMeta):
"""Type variable."""
_backported_typevarlike = typing.TypeVar
def __new__(cls, name, *constraints, bound=None,
covariant=False, contravariant=False,
default=NoDefault, infer_variance=False):
if hasattr(typing, "TypeAliasType"):
# PEP 695 implemented (3.12+), can pass infer_variance to typing.TypeVar
typevar = typing.TypeVar(name, *constraints, bound=bound,
covariant=covariant, contravariant=contravariant,
infer_variance=infer_variance)
else:
typevar = typing.TypeVar(name, *constraints, bound=bound,
covariant=covariant, contravariant=contravariant)
if infer_variance and (covariant or contravariant):
raise ValueError("Variance cannot be specified with infer_variance.")
typevar.__infer_variance__ = infer_variance
_set_default(typevar, default)
_set_module(typevar)
def _tvar_prepare_subst(alias, args):
if (
typevar.has_default()
and alias.__parameters__.index(typevar) == len(args)
):
args += (typevar.__default__,)
return args
typevar.__typing_prepare_subst__ = _tvar_prepare_subst
return typevar
def __init_subclass__(cls) -> None:
raise TypeError(f"type '{__name__}.TypeVar' is not an acceptable base type")
# Python 3.10+ has PEP 612
if hasattr(typing, 'ParamSpecArgs'):
ParamSpecArgs = typing.ParamSpecArgs
ParamSpecKwargs = typing.ParamSpecKwargs
# 3.8-3.9
else:
class _Immutable:
"""Mixin to indicate that object should not be copied."""
__slots__ = ()
def __copy__(self):
return self
def __deepcopy__(self, memo):
return self
class ParamSpecArgs(_Immutable):
"""The args for a ParamSpec object.
Given a ParamSpec object P, P.args is an instance of ParamSpecArgs.
ParamSpecArgs objects have a reference back to their ParamSpec:
P.args.__origin__ is P
This type is meant for runtime introspection and has no special meaning to
static type checkers.
"""
def __init__(self, origin):
self.__origin__ = origin
def __repr__(self):
return f"{self.__origin__.__name__}.args"
def __eq__(self, other):
if not isinstance(other, ParamSpecArgs):
return NotImplemented
return self.__origin__ == other.__origin__
class ParamSpecKwargs(_Immutable):
"""The kwargs for a ParamSpec object.
Given a ParamSpec object P, P.kwargs is an instance of ParamSpecKwargs.
ParamSpecKwargs objects have a reference back to their ParamSpec:
P.kwargs.__origin__ is P
This type is meant for runtime introspection and has no special meaning to
static type checkers.
"""
def __init__(self, origin):
self.__origin__ = origin
def __repr__(self):
return f"{self.__origin__.__name__}.kwargs"
def __eq__(self, other):
if not isinstance(other, ParamSpecKwargs):
return NotImplemented
return self.__origin__ == other.__origin__
if _PEP_696_IMPLEMENTED:
from typing import ParamSpec
# 3.10+
elif hasattr(typing, 'ParamSpec'):
# Add default parameter - PEP 696
class ParamSpec(metaclass=_TypeVarLikeMeta):
"""Parameter specification."""
_backported_typevarlike = typing.ParamSpec
def __new__(cls, name, *, bound=None,
covariant=False, contravariant=False,
infer_variance=False, default=NoDefault):
if hasattr(typing, "TypeAliasType"):
# PEP 695 implemented, can pass infer_variance to typing.TypeVar
paramspec = typing.ParamSpec(name, bound=bound,
covariant=covariant,
contravariant=contravariant,
infer_variance=infer_variance)
else:
paramspec = typing.ParamSpec(name, bound=bound,
covariant=covariant,
contravariant=contravariant)
paramspec.__infer_variance__ = infer_variance
_set_default(paramspec, default)
_set_module(paramspec)
def _paramspec_prepare_subst(alias, args):
params = alias.__parameters__
i = params.index(paramspec)
if i == len(args) and paramspec.has_default():
args = [*args, paramspec.__default__]
if i >= len(args):
raise TypeError(f"Too few arguments for {alias}")
# Special case where Z[[int, str, bool]] == Z[int, str, bool] in PEP 612.
if len(params) == 1 and not typing._is_param_expr(args[0]):
assert i == 0
args = (args,)
# Convert lists to tuples to help other libraries cache the results.
elif isinstance(args[i], list):
args = (*args[:i], tuple(args[i]), *args[i + 1:])
return args
paramspec.__typing_prepare_subst__ = _paramspec_prepare_subst
return paramspec
def __init_subclass__(cls) -> None:
raise TypeError(f"type '{__name__}.ParamSpec' is not an acceptable base type")
# 3.8-3.9
else:
# Inherits from list as a workaround for Callable checks in Python < 3.9.2.
class ParamSpec(list, _DefaultMixin):
"""Parameter specification variable.
Usage::
P = ParamSpec('P')
Parameter specification variables exist primarily for the benefit of static
type checkers. They are used to forward the parameter types of one
callable to another callable, a pattern commonly found in higher order
functions and decorators. They are only valid when used in ``Concatenate``,
or s the first argument to ``Callable``. In Python 3.10 and higher,
they are also supported in user-defined Generics at runtime.
See class Generic for more information on generic types. An
example for annotating a decorator::
T = TypeVar('T')
P = ParamSpec('P')
def add_logging(f: Callable[P, T]) -> Callable[P, T]:
'''A type-safe decorator to add logging to a function.'''
def inner(*args: P.args, **kwargs: P.kwargs) -> T:
logging.info(f'{f.__name__} was called')
return f(*args, **kwargs)
return inner
@add_logging
def add_two(x: float, y: float) -> float:
'''Add two numbers together.'''
return x + y
Parameter specification variables defined with covariant=True or
contravariant=True can be used to declare covariant or contravariant
generic types. These keyword arguments are valid, but their actual semantics
are yet to be decided. See PEP 612 for details.
Parameter specification variables can be introspected. e.g.:
P.__name__ == 'T'
P.__bound__ == None
P.__covariant__ == False
P.__contravariant__ == False
Note that only parameter specification variables defined in global scope can
be pickled.
"""
# Trick Generic __parameters__.
__class__ = typing.TypeVar
@property
def args(self):
return ParamSpecArgs(self)
@property
def kwargs(self):
return ParamSpecKwargs(self)
def __init__(self, name, *, bound=None, covariant=False, contravariant=False,
infer_variance=False, default=NoDefault):
list.__init__(self, [self])
self.__name__ = name
self.__covariant__ = bool(covariant)
self.__contravariant__ = bool(contravariant)
self.__infer_variance__ = bool(infer_variance)
if bound:
self.__bound__ = typing._type_check(bound, 'Bound must be a type.')
else:
self.__bound__ = None
_DefaultMixin.__init__(self, default)
# for pickling:
def_mod = _caller()
if def_mod != 'typing_extensions':
self.__module__ = def_mod
def __repr__(self):
if self.__infer_variance__:
prefix = ''
elif self.__covariant__:
prefix = '+'
elif self.__contravariant__:
prefix = '-'
else:
prefix = '~'
return prefix + self.__name__
def __hash__(self):
return object.__hash__(self)
def __eq__(self, other):
return self is other
def __reduce__(self):
return self.__name__
# Hack to get typing._type_check to pass.
def __call__(self, *args, **kwargs):
pass
# 3.8-3.9
if not hasattr(typing, 'Concatenate'):
# Inherits from list as a workaround for Callable checks in Python < 3.9.2.
class _ConcatenateGenericAlias(list):
# Trick Generic into looking into this for __parameters__.
__class__ = typing._GenericAlias
# Flag in 3.8.
_special = False
def __init__(self, origin, args):
super().__init__(args)
self.__origin__ = origin
self.__args__ = args
def __repr__(self):
_type_repr = typing._type_repr
return (f'{_type_repr(self.__origin__)}'
f'[{", ".join(_type_repr(arg) for arg in self.__args__)}]')
def __hash__(self):
return hash((self.__origin__, self.__args__))
# Hack to get typing._type_check to pass in Generic.
def __call__(self, *args, **kwargs):
pass
@property
def __parameters__(self):
return tuple(
tp for tp in self.__args__ if isinstance(tp, (typing.TypeVar, ParamSpec))
)
# 3.8-3.9
@typing._tp_cache
def _concatenate_getitem(self, parameters):
if parameters == ():
raise TypeError("Cannot take a Concatenate of no types.")
if not isinstance(parameters, tuple):
parameters = (parameters,)
if not isinstance(parameters[-1], ParamSpec):
raise TypeError("The last parameter to Concatenate should be a "
"ParamSpec variable.")
msg = "Concatenate[arg, ...]: each arg must be a type."
parameters = tuple(typing._type_check(p, msg) for p in parameters)
return _ConcatenateGenericAlias(self, parameters)
# 3.10+
if hasattr(typing, 'Concatenate'):
Concatenate = typing.Concatenate
_ConcatenateGenericAlias = typing._ConcatenateGenericAlias
# 3.9
elif sys.version_info[:2] >= (3, 9):
@_ExtensionsSpecialForm
def Concatenate(self, parameters):
"""Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a
higher order function which adds, removes or transforms parameters of a
callable.
For example::
Callable[Concatenate[int, P], int]
See PEP 612 for detailed information.
"""
return _concatenate_getitem(self, parameters)
# 3.8
else:
class _ConcatenateForm(_ExtensionsSpecialForm, _root=True):
def __getitem__(self, parameters):
return _concatenate_getitem(self, parameters)
Concatenate = _ConcatenateForm(
'Concatenate',
doc="""Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a
higher order function which adds, removes or transforms parameters of a
callable.
For example::
Callable[Concatenate[int, P], int]
See PEP 612 for detailed information.
""")
# 3.10+
if hasattr(typing, 'TypeGuard'):
TypeGuard = typing.TypeGuard
# 3.9
elif sys.version_info[:2] >= (3, 9):
@_ExtensionsSpecialForm
def TypeGuard(self, parameters):
"""Special typing form used to annotate the return type of a user-defined
type guard function. ``TypeGuard`` only accepts a single type argument.
At runtime, functions marked this way should return a boolean.
``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static
type checkers to determine a more precise type of an expression within a
program's code flow. Usually type narrowing is done by analyzing
conditional code flow and applying the narrowing to a block of code. The
conditional expression here is sometimes referred to as a "type guard".
Sometimes it would be convenient to use a user-defined boolean function
as a type guard. Such a function should use ``TypeGuard[...]`` as its
return type to alert static type checkers to this intention.
Using ``-> TypeGuard`` tells the static type checker that for a given
function:
1. The return value is a boolean.
2. If the return value is ``True``, the type of its argument
is the type inside ``TypeGuard``.
For example::
def is_str(val: Union[str, float]):
# "isinstance" type guard
if isinstance(val, str):
# Type of ``val`` is narrowed to ``str``
...
else:
# Else, type of ``val`` is narrowed to ``float``.
...
Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower
form of ``TypeA`` (it can even be a wider form) and this may lead to
type-unsafe results. The main reason is to allow for things like
narrowing ``List[object]`` to ``List[str]`` even though the latter is not
a subtype of the former, since ``List`` is invariant. The responsibility of
writing type-safe type guards is left to the user.
``TypeGuard`` also works with type variables. For more information, see
PEP 647 (User-Defined Type Guards).
"""
item = typing._type_check(parameters, f'{self} accepts only a single type.')
return typing._GenericAlias(self, (item,))
# 3.8
else:
class _TypeGuardForm(_ExtensionsSpecialForm, _root=True):
def __getitem__(self, parameters):
item = typing._type_check(parameters,
f'{self._name} accepts only a single type')
return typing._GenericAlias(self, (item,))
TypeGuard = _TypeGuardForm(
'TypeGuard',
doc="""Special typing form used to annotate the return type of a user-defined
type guard function. ``TypeGuard`` only accepts a single type argument.
At runtime, functions marked this way should return a boolean.
``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static
type checkers to determine a more precise type of an expression within a
program's code flow. Usually type narrowing is done by analyzing
conditional code flow and applying the narrowing to a block of code. The
conditional expression here is sometimes referred to as a "type guard".
Sometimes it would be convenient to use a user-defined boolean function
as a type guard. Such a function should use ``TypeGuard[...]`` as its
return type to alert static type checkers to this intention.
Using ``-> TypeGuard`` tells the static type checker that for a given
function:
1. The return value is a boolean.
2. If the return value is ``True``, the type of its argument
is the type inside ``TypeGuard``.
For example::
def is_str(val: Union[str, float]):
# "isinstance" type guard
if isinstance(val, str):
# Type of ``val`` is narrowed to ``str``
...
else:
# Else, type of ``val`` is narrowed to ``float``.
...
Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower
form of ``TypeA`` (it can even be a wider form) and this may lead to
type-unsafe results. The main reason is to allow for things like
narrowing ``List[object]`` to ``List[str]`` even though the latter is not
a subtype of the former, since ``List`` is invariant. The responsibility of
writing type-safe type guards is left to the user.
``TypeGuard`` also works with type variables. For more information, see
PEP 647 (User-Defined Type Guards).
""")
# 3.13+
if hasattr(typing, 'TypeIs'):
TypeIs = typing.TypeIs
# 3.9
elif sys.version_info[:2] >= (3, 9):
@_ExtensionsSpecialForm
def TypeIs(self, parameters):
"""Special typing form used to annotate the return type of a user-defined
type narrower function. ``TypeIs`` only accepts a single type argument.
At runtime, functions marked this way should return a boolean.
``TypeIs`` aims to benefit *type narrowing* -- a technique used by static
type checkers to determine a more precise type of an expression within a
program's code flow. Usually type narrowing is done by analyzing
conditional code flow and applying the narrowing to a block of code. The
conditional expression here is sometimes referred to as a "type guard".
Sometimes it would be convenient to use a user-defined boolean function
as a type guard. Such a function should use ``TypeIs[...]`` as its
return type to alert static type checkers to this intention.
Using ``-> TypeIs`` tells the static type checker that for a given
function:
1. The return value is a boolean.
2. If the return value is ``True``, the type of its argument
is the intersection of the type inside ``TypeGuard`` and the argument's
previously known type.
For example::
def is_awaitable(val: object) -> TypeIs[Awaitable[Any]]:
return hasattr(val, '__await__')
def f(val: Union[int, Awaitable[int]]) -> int:
if is_awaitable(val):
assert_type(val, Awaitable[int])
else:
assert_type(val, int)
``TypeIs`` also works with type variables. For more information, see
PEP 742 (Narrowing types with TypeIs).
"""
item = typing._type_check(parameters, f'{self} accepts only a single type.')
return typing._GenericAlias(self, (item,))
# 3.8
else:
class _TypeIsForm(_ExtensionsSpecialForm, _root=True):
def __getitem__(self, parameters):
item = typing._type_check(parameters,
f'{self._name} accepts only a single type')
return typing._GenericAlias(self, (item,))
TypeIs = _TypeIsForm(
'TypeIs',
doc="""Special typing form used to annotate the return type of a user-defined
type narrower function. ``TypeIs`` only accepts a single type argument.
At runtime, functions marked this way should return a boolean.
``TypeIs`` aims to benefit *type narrowing* -- a technique used by static
type checkers to determine a more precise type of an expression within a
program's code flow. Usually type narrowing is done by analyzing
conditional code flow and applying the narrowing to a block of code. The
conditional expression here is sometimes referred to as a "type guard".
Sometimes it would be convenient to use a user-defined boolean function
as a type guard. Such a function should use ``TypeIs[...]`` as its
return type to alert static type checkers to this intention.
Using ``-> TypeIs`` tells the static type checker that for a given
function:
1. The return value is a boolean.
2. If the return value is ``True``, the type of its argument
is the intersection of the type inside ``TypeGuard`` and the argument's
previously known type.
For example::
def is_awaitable(val: object) -> TypeIs[Awaitable[Any]]:
return hasattr(val, '__await__')
def f(val: Union[int, Awaitable[int]]) -> int:
if is_awaitable(val):
assert_type(val, Awaitable[int])
else:
assert_type(val, int)
``TypeIs`` also works with type variables. For more information, see
PEP 742 (Narrowing types with TypeIs).
""")
# Vendored from cpython typing._SpecialFrom
class _SpecialForm(typing._Final, _root=True):
__slots__ = ('_name', '__doc__', '_getitem')
def __init__(self, getitem):
self._getitem = getitem
self._name = getitem.__name__
self.__doc__ = getitem.__doc__
def __getattr__(self, item):
if item in {'__name__', '__qualname__'}:
return self._name
raise AttributeError(item)
def __mro_entries__(self, bases):
raise TypeError(f"Cannot subclass {self!r}")
def __repr__(self):
return f'typing_extensions.{self._name}'
def __reduce__(self):
return self._name
def __call__(self, *args, **kwds):
raise TypeError(f"Cannot instantiate {self!r}")
def __or__(self, other):
return typing.Union[self, other]
def __ror__(self, other):
return typing.Union[other, self]
def __instancecheck__(self, obj):
raise TypeError(f"{self} cannot be used with isinstance()")
def __subclasscheck__(self, cls):
raise TypeError(f"{self} cannot be used with issubclass()")
@typing._tp_cache
def __getitem__(self, parameters):
return self._getitem(self, parameters)
if hasattr(typing, "LiteralString"): # 3.11+
LiteralString = typing.LiteralString
else:
@_SpecialForm
def LiteralString(self, params):
"""Represents an arbitrary literal string.
Example::
from metaflow._vendor.typing_extensions import LiteralString
def query(sql: LiteralString) -> ...:
...
query("SELECT * FROM table") # ok
query(f"SELECT * FROM {input()}") # not ok
See PEP 675 for details.
"""
raise TypeError(f"{self} is not subscriptable")
if hasattr(typing, "Self"): # 3.11+
Self = typing.Self
else:
@_SpecialForm
def Self(self, params):
"""Used to spell the type of "self" in classes.
Example::
from typing import Self
class ReturnsSelf:
def parse(self, data: bytes) -> Self:
...
return self
"""
raise TypeError(f"{self} is not subscriptable")
if hasattr(typing, "Never"): # 3.11+
Never = typing.Never
else:
@_SpecialForm
def Never(self, params):
"""The bottom type, a type that has no members.
This can be used to define a function that should never be
called, or a function that never returns::
from metaflow._vendor.typing_extensions import Never
def never_call_me(arg: Never) -> None:
pass
def int_or_str(arg: int | str) -> None:
never_call_me(arg) # type checker error
match arg:
case int():
print("It's an int")
case str():
print("It's a str")
case _:
never_call_me(arg) # ok, arg is of type Never
"""
raise TypeError(f"{self} is not subscriptable")
if hasattr(typing, 'Required'): # 3.11+
Required = typing.Required
NotRequired = typing.NotRequired
elif sys.version_info[:2] >= (3, 9): # 3.9-3.10
@_ExtensionsSpecialForm
def Required(self, parameters):
"""A special typing construct to mark a key of a total=False TypedDict
as required. For example:
class Movie(TypedDict, total=False):
title: Required[str]
year: int
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
There is no runtime checking that a required key is actually provided
when instantiating a related TypedDict.
"""
item = typing._type_check(parameters, f'{self._name} accepts only a single type.')
return typing._GenericAlias(self, (item,))
@_ExtensionsSpecialForm
def NotRequired(self, parameters):
"""A special typing construct to mark a key of a TypedDict as
potentially missing. For example:
class Movie(TypedDict):
title: str
year: NotRequired[int]
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
"""
item = typing._type_check(parameters, f'{self._name} accepts only a single type.')
return typing._GenericAlias(self, (item,))
else: # 3.8
class _RequiredForm(_ExtensionsSpecialForm, _root=True):
def __getitem__(self, parameters):
item = typing._type_check(parameters,
f'{self._name} accepts only a single type.')
return typing._GenericAlias(self, (item,))
Required = _RequiredForm(
'Required',
doc="""A special typing construct to mark a key of a total=False TypedDict
as required. For example:
class Movie(TypedDict, total=False):
title: Required[str]
year: int
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
There is no runtime checking that a required key is actually provided
when instantiating a related TypedDict.
""")
NotRequired = _RequiredForm(
'NotRequired',
doc="""A special typing construct to mark a key of a TypedDict as
potentially missing. For example:
class Movie(TypedDict):
title: str
year: NotRequired[int]
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
""")
if hasattr(typing, 'ReadOnly'):
ReadOnly = typing.ReadOnly
elif sys.version_info[:2] >= (3, 9): # 3.9-3.12
@_ExtensionsSpecialForm
def ReadOnly(self, parameters):
"""A special typing construct to mark an item of a TypedDict as read-only.
For example:
class Movie(TypedDict):
title: ReadOnly[str]
year: int
def mutate_movie(m: Movie) -> None:
m["year"] = 1992 # allowed
m["title"] = "The Matrix" # typechecker error
There is no runtime checking for this property.
"""
item = typing._type_check(parameters, f'{self._name} accepts only a single type.')
return typing._GenericAlias(self, (item,))
else: # 3.8
class _ReadOnlyForm(_ExtensionsSpecialForm, _root=True):
def __getitem__(self, parameters):
item = typing._type_check(parameters,
f'{self._name} accepts only a single type.')
return typing._GenericAlias(self, (item,))
ReadOnly = _ReadOnlyForm(
'ReadOnly',
doc="""A special typing construct to mark a key of a TypedDict as read-only.
For example:
class Movie(TypedDict):
title: ReadOnly[str]
year: int
def mutate_movie(m: Movie) -> None:
m["year"] = 1992 # allowed
m["title"] = "The Matrix" # typechecker error
There is no runtime checking for this propery.
""")
_UNPACK_DOC = """\
Type unpack operator.
The type unpack operator takes the child types from some container type,
such as `tuple[int, str]` or a `TypeVarTuple`, and 'pulls them out'. For
example:
# For some generic class `Foo`:
Foo[Unpack[tuple[int, str]]] # Equivalent to Foo[int, str]
Ts = TypeVarTuple('Ts')
# Specifies that `Bar` is generic in an arbitrary number of types.
# (Think of `Ts` as a tuple of an arbitrary number of individual
# `TypeVar`s, which the `Unpack` is 'pulling out' directly into the
# `Generic[]`.)
class Bar(Generic[Unpack[Ts]]): ...
Bar[int] # Valid
Bar[int, str] # Also valid
From Python 3.11, this can also be done using the `*` operator:
Foo[*tuple[int, str]]
class Bar(Generic[*Ts]): ...
The operator can also be used along with a `TypedDict` to annotate
`**kwargs` in a function signature. For instance:
class Movie(TypedDict):
name: str
year: int
# This function expects two keyword arguments - *name* of type `str` and
# *year* of type `int`.
def foo(**kwargs: Unpack[Movie]): ...
Note that there is only some runtime checking of this operator. Not
everything the runtime allows may be accepted by static type checkers.
For more information, see PEP 646 and PEP 692.
"""
if sys.version_info >= (3, 12): # PEP 692 changed the repr of Unpack[]
Unpack = typing.Unpack
def _is_unpack(obj):
return get_origin(obj) is Unpack
elif sys.version_info[:2] >= (3, 9): # 3.9+
class _UnpackSpecialForm(_ExtensionsSpecialForm, _root=True):
def __init__(self, getitem):
super().__init__(getitem)
self.__doc__ = _UNPACK_DOC
class _UnpackAlias(typing._GenericAlias, _root=True):
__class__ = typing.TypeVar
@property
def __typing_unpacked_tuple_args__(self):
assert self.__origin__ is Unpack
assert len(self.__args__) == 1
arg, = self.__args__
if isinstance(arg, (typing._GenericAlias, _types.GenericAlias)):
if arg.__origin__ is not tuple:
raise TypeError("Unpack[...] must be used with a tuple type")
return arg.__args__
return None
@_UnpackSpecialForm
def Unpack(self, parameters):
item = typing._type_check(parameters, f'{self._name} accepts only a single type.')
return _UnpackAlias(self, (item,))
def _is_unpack(obj):
return isinstance(obj, _UnpackAlias)
else: # 3.8
class _UnpackAlias(typing._GenericAlias, _root=True):
__class__ = typing.TypeVar
class _UnpackForm(_ExtensionsSpecialForm, _root=True):
def __getitem__(self, parameters):
item = typing._type_check(parameters,
f'{self._name} accepts only a single type.')
return _UnpackAlias(self, (item,))
Unpack = _UnpackForm('Unpack', doc=_UNPACK_DOC)
def _is_unpack(obj):
return isinstance(obj, _UnpackAlias)
if _PEP_696_IMPLEMENTED:
from typing import TypeVarTuple
elif hasattr(typing, "TypeVarTuple"): # 3.11+
def _unpack_args(*args):
newargs = []
for arg in args:
subargs = getattr(arg, '__typing_unpacked_tuple_args__', None)
if subargs is not None and not (subargs and subargs[-1] is ...):
newargs.extend(subargs)
else:
newargs.append(arg)
return newargs
# Add default parameter - PEP 696
class TypeVarTuple(metaclass=_TypeVarLikeMeta):
"""Type variable tuple."""
_backported_typevarlike = typing.TypeVarTuple
def __new__(cls, name, *, default=NoDefault):
tvt = typing.TypeVarTuple(name)
_set_default(tvt, default)
_set_module(tvt)
def _typevartuple_prepare_subst(alias, args):
params = alias.__parameters__
typevartuple_index = params.index(tvt)
for param in params[typevartuple_index + 1:]:
if isinstance(param, TypeVarTuple):
raise TypeError(
f"More than one TypeVarTuple parameter in {alias}"
)
alen = len(args)
plen = len(params)
left = typevartuple_index
right = plen - typevartuple_index - 1
var_tuple_index = None
fillarg = None
for k, arg in enumerate(args):
if not isinstance(arg, type):
subargs = getattr(arg, '__typing_unpacked_tuple_args__', None)
if subargs and len(subargs) == 2 and subargs[-1] is ...:
if var_tuple_index is not None:
raise TypeError(
"More than one unpacked "
"arbitrary-length tuple argument"
)
var_tuple_index = k
fillarg = subargs[0]
if var_tuple_index is not None:
left = min(left, var_tuple_index)
right = min(right, alen - var_tuple_index - 1)
elif left + right > alen:
raise TypeError(f"Too few arguments for {alias};"
f" actual {alen}, expected at least {plen - 1}")
if left == alen - right and tvt.has_default():
replacement = _unpack_args(tvt.__default__)
else:
replacement = args[left: alen - right]
return (
*args[:left],
*([fillarg] * (typevartuple_index - left)),
replacement,
*([fillarg] * (plen - right - left - typevartuple_index - 1)),
*args[alen - right:],
)
tvt.__typing_prepare_subst__ = _typevartuple_prepare_subst
return tvt
def __init_subclass__(self, *args, **kwds):
raise TypeError("Cannot subclass special typing classes")
else: # <=3.10
class TypeVarTuple(_DefaultMixin):
"""Type variable tuple.
Usage::
Ts = TypeVarTuple('Ts')
In the same way that a normal type variable is a stand-in for a single
type such as ``int``, a type variable *tuple* is a stand-in for a *tuple*
type such as ``Tuple[int, str]``.
Type variable tuples can be used in ``Generic`` declarations.
Consider the following example::
class Array(Generic[*Ts]): ...
The ``Ts`` type variable tuple here behaves like ``tuple[T1, T2]``,
where ``T1`` and ``T2`` are type variables. To use these type variables
as type parameters of ``Array``, we must *unpack* the type variable tuple using
the star operator: ``*Ts``. The signature of ``Array`` then behaves
as if we had simply written ``class Array(Generic[T1, T2]): ...``.
In contrast to ``Generic[T1, T2]``, however, ``Generic[*Shape]`` allows
us to parameterise the class with an *arbitrary* number of type parameters.
Type variable tuples can be used anywhere a normal ``TypeVar`` can.
This includes class definitions, as shown above, as well as function
signatures and variable annotations::
class Array(Generic[*Ts]):
def __init__(self, shape: Tuple[*Ts]):
self._shape: Tuple[*Ts] = shape
def get_shape(self) -> Tuple[*Ts]:
return self._shape
shape = (Height(480), Width(640))
x: Array[Height, Width] = Array(shape)
y = abs(x) # Inferred type is Array[Height, Width]
z = x + x # ... is Array[Height, Width]
x.get_shape() # ... is tuple[Height, Width]
"""
# Trick Generic __parameters__.
__class__ = typing.TypeVar
def __iter__(self):
yield self.__unpacked__
def __init__(self, name, *, default=NoDefault):
self.__name__ = name
_DefaultMixin.__init__(self, default)
# for pickling:
def_mod = _caller()
if def_mod != 'typing_extensions':
self.__module__ = def_mod
self.__unpacked__ = Unpack[self]
def __repr__(self):
return self.__name__
def __hash__(self):
return object.__hash__(self)
def __eq__(self, other):
return self is other
def __reduce__(self):
return self.__name__
def __init_subclass__(self, *args, **kwds):
if '_root' not in kwds:
raise TypeError("Cannot subclass special typing classes")
if hasattr(typing, "reveal_type"): # 3.11+
reveal_type = typing.reveal_type
else: # <=3.10
def reveal_type(obj: T, /) -> T:
"""Reveal the inferred type of a variable.
When a static type checker encounters a call to ``reveal_type()``,
it will emit the inferred type of the argument::
x: int = 1
reveal_type(x)
Running a static type checker (e.g., ``mypy``) on this example
will produce output similar to 'Revealed type is "builtins.int"'.
At runtime, the function prints the runtime type of the
argument and returns it unchanged.
"""
print(f"Runtime type is {type(obj).__name__!r}", file=sys.stderr)
return obj
if hasattr(typing, "_ASSERT_NEVER_REPR_MAX_LENGTH"): # 3.11+
_ASSERT_NEVER_REPR_MAX_LENGTH = typing._ASSERT_NEVER_REPR_MAX_LENGTH
else: # <=3.10
_ASSERT_NEVER_REPR_MAX_LENGTH = 100
if hasattr(typing, "assert_never"): # 3.11+
assert_never = typing.assert_never
else: # <=3.10
def assert_never(arg: Never, /) -> Never:
"""Assert to the type checker that a line of code is unreachable.
Example::
def int_or_str(arg: int | str) -> None:
match arg:
case int():
print("It's an int")
case str():
print("It's a str")
case _:
assert_never(arg)
If a type checker finds that a call to assert_never() is
reachable, it will emit an error.
At runtime, this throws an exception when called.
"""
value = repr(arg)
if len(value) > _ASSERT_NEVER_REPR_MAX_LENGTH:
value = value[:_ASSERT_NEVER_REPR_MAX_LENGTH] + '...'
raise AssertionError(f"Expected code to be unreachable, but got: {value}")
if sys.version_info >= (3, 12): # 3.12+
# dataclass_transform exists in 3.11 but lacks the frozen_default parameter
dataclass_transform = typing.dataclass_transform
else: # <=3.11
def dataclass_transform(
*,
eq_default: bool = True,
order_default: bool = False,
kw_only_default: bool = False,
frozen_default: bool = False,
field_specifiers: typing.Tuple[
typing.Union[typing.Type[typing.Any], typing.Callable[..., typing.Any]],
...
] = (),
**kwargs: typing.Any,
) -> typing.Callable[[T], T]:
"""Decorator that marks a function, class, or metaclass as providing
dataclass-like behavior.
Example:
from metaflow._vendor.typing_extensions import dataclass_transform
_T = TypeVar("_T")
# Used on a decorator function
@dataclass_transform()
def create_model(cls: type[_T]) -> type[_T]:
...
return cls
@create_model
class CustomerModel:
id: int
name: str
# Used on a base class
@dataclass_transform()
class ModelBase: ...
class CustomerModel(ModelBase):
id: int
name: str
# Used on a metaclass
@dataclass_transform()
class ModelMeta(type): ...
class ModelBase(metaclass=ModelMeta): ...
class CustomerModel(ModelBase):
id: int
name: str
Each of the ``CustomerModel`` classes defined in this example will now
behave similarly to a dataclass created with the ``@dataclasses.dataclass``
decorator. For example, the type checker will synthesize an ``__init__``
method.
The arguments to this decorator can be used to customize this behavior:
- ``eq_default`` indicates whether the ``eq`` parameter is assumed to be
True or False if it is omitted by the caller.
- ``order_default`` indicates whether the ``order`` parameter is
assumed to be True or False if it is omitted by the caller.
- ``kw_only_default`` indicates whether the ``kw_only`` parameter is
assumed to be True or False if it is omitted by the caller.
- ``frozen_default`` indicates whether the ``frozen`` parameter is
assumed to be True or False if it is omitted by the caller.
- ``field_specifiers`` specifies a static list of supported classes
or functions that describe fields, similar to ``dataclasses.field()``.
At runtime, this decorator records its arguments in the
``__dataclass_transform__`` attribute on the decorated object.
See PEP 681 for details.
"""
def decorator(cls_or_fn):
cls_or_fn.__dataclass_transform__ = {
"eq_default": eq_default,
"order_default": order_default,
"kw_only_default": kw_only_default,
"frozen_default": frozen_default,
"field_specifiers": field_specifiers,
"kwargs": kwargs,
}
return cls_or_fn
return decorator
if hasattr(typing, "override"): # 3.12+
override = typing.override
else: # <=3.11
_F = typing.TypeVar("_F", bound=typing.Callable[..., typing.Any])
def override(arg: _F, /) -> _F:
"""Indicate that a method is intended to override a method in a base class.
Usage:
class Base:
def method(self) -> None:
pass
class Child(Base):
@override
def method(self) -> None:
super().method()
When this decorator is applied to a method, the type checker will
validate that it overrides a method with the same name on a base class.
This helps prevent bugs that may occur when a base class is changed
without an equivalent change to a child class.
There is no runtime checking of these properties. The decorator
sets the ``__override__`` attribute to ``True`` on the decorated object
to allow runtime introspection.
See PEP 698 for details.
"""
try:
arg.__override__ = True
except (AttributeError, TypeError):
# Skip the attribute silently if it is not writable.
# AttributeError happens if the object has __slots__ or a
# read-only property, TypeError if it's a builtin class.
pass
return arg
if hasattr(warnings, "deprecated"):
deprecated = warnings.deprecated
else:
_T = typing.TypeVar("_T")
class deprecated:
"""Indicate that a class, function or overload is deprecated.
When this decorator is applied to an object, the type checker
will generate a diagnostic on usage of the deprecated object.
Usage:
@deprecated("Use B instead")
class A:
pass
@deprecated("Use g instead")
def f():
pass
@overload
@deprecated("int support is deprecated")
def g(x: int) -> int: ...
@overload
def g(x: str) -> int: ...
The warning specified by *category* will be emitted at runtime
on use of deprecated objects. For functions, that happens on calls;
for classes, on instantiation and on creation of subclasses.
If the *category* is ``None``, no warning is emitted at runtime.
The *stacklevel* determines where the
warning is emitted. If it is ``1`` (the default), the warning
is emitted at the direct caller of the deprecated object; if it
is higher, it is emitted further up the stack.
Static type checker behavior is not affected by the *category*
and *stacklevel* arguments.
The deprecation message passed to the decorator is saved in the
``__deprecated__`` attribute on the decorated object.
If applied to an overload, the decorator
must be after the ``@overload`` decorator for the attribute to
exist on the overload as returned by ``get_overloads()``.
See PEP 702 for details.
"""
def __init__(
self,
message: str,
/,
*,
category: typing.Optional[typing.Type[Warning]] = DeprecationWarning,
stacklevel: int = 1,
) -> None:
if not isinstance(message, str):
raise TypeError(
"Expected an object of type str for 'message', not "
f"{type(message).__name__!r}"
)
self.message = message
self.category = category
self.stacklevel = stacklevel
def __call__(self, arg: _T, /) -> _T:
# Make sure the inner functions created below don't
# retain a reference to self.
msg = self.message
category = self.category
stacklevel = self.stacklevel
if category is None:
arg.__deprecated__ = msg
return arg
elif isinstance(arg, type):
import functools
from types import MethodType
original_new = arg.__new__
@functools.wraps(original_new)
def __new__(cls, *args, **kwargs):
if cls is arg:
warnings.warn(msg, category=category, stacklevel=stacklevel + 1)
if original_new is not object.__new__:
return original_new(cls, *args, **kwargs)
# Mirrors a similar check in object.__new__.
elif cls.__init__ is object.__init__ and (args or kwargs):
raise TypeError(f"{cls.__name__}() takes no arguments")
else:
return original_new(cls)
arg.__new__ = staticmethod(__new__)
original_init_subclass = arg.__init_subclass__
# We need slightly different behavior if __init_subclass__
# is a bound method (likely if it was implemented in Python)
if isinstance(original_init_subclass, MethodType):
original_init_subclass = original_init_subclass.__func__
@functools.wraps(original_init_subclass)
def __init_subclass__(*args, **kwargs):
warnings.warn(msg, category=category, stacklevel=stacklevel + 1)
return original_init_subclass(*args, **kwargs)
arg.__init_subclass__ = classmethod(__init_subclass__)
# Or otherwise, which likely means it's a builtin such as
# object's implementation of __init_subclass__.
else:
@functools.wraps(original_init_subclass)
def __init_subclass__(*args, **kwargs):
warnings.warn(msg, category=category, stacklevel=stacklevel + 1)
return original_init_subclass(*args, **kwargs)
arg.__init_subclass__ = __init_subclass__
arg.__deprecated__ = __new__.__deprecated__ = msg
__init_subclass__.__deprecated__ = msg
return arg
elif callable(arg):
import functools
@functools.wraps(arg)
def wrapper(*args, **kwargs):
warnings.warn(msg, category=category, stacklevel=stacklevel + 1)
return arg(*args, **kwargs)
arg.__deprecated__ = wrapper.__deprecated__ = msg
return wrapper
else:
raise TypeError(
"@deprecated decorator with non-None category must be applied to "
f"a class or callable, not {arg!r}"
)
# We have to do some monkey patching to deal with the dual nature of
# Unpack/TypeVarTuple:
# - We want Unpack to be a kind of TypeVar so it gets accepted in
# Generic[Unpack[Ts]]
# - We want it to *not* be treated as a TypeVar for the purposes of
# counting generic parameters, so that when we subscript a generic,
# the runtime doesn't try to substitute the Unpack with the subscripted type.
if not hasattr(typing, "TypeVarTuple"):
def _check_generic(cls, parameters, elen=_marker):
"""Check correct count for parameters of a generic cls (internal helper).
This gives a nice error message in case of count mismatch.
"""
if not elen:
raise TypeError(f"{cls} is not a generic class")
if elen is _marker:
if not hasattr(cls, "__parameters__") or not cls.__parameters__:
raise TypeError(f"{cls} is not a generic class")
elen = len(cls.__parameters__)
alen = len(parameters)
if alen != elen:
expect_val = elen
if hasattr(cls, "__parameters__"):
parameters = [p for p in cls.__parameters__ if not _is_unpack(p)]
num_tv_tuples = sum(isinstance(p, TypeVarTuple) for p in parameters)
if (num_tv_tuples > 0) and (alen >= elen - num_tv_tuples):
return
# deal with TypeVarLike defaults
# required TypeVarLikes cannot appear after a defaulted one.
if alen < elen:
# since we validate TypeVarLike default in _collect_type_vars
# or _collect_parameters we can safely check parameters[alen]
if (
getattr(parameters[alen], '__default__', NoDefault)
is not NoDefault
):
return
num_default_tv = sum(getattr(p, '__default__', NoDefault)
is not NoDefault for p in parameters)
elen -= num_default_tv
expect_val = f"at least {elen}"
things = "arguments" if sys.version_info >= (3, 10) else "parameters"
raise TypeError(f"Too {'many' if alen > elen else 'few'} {things}"
f" for {cls}; actual {alen}, expected {expect_val}")
else:
# Python 3.11+
def _check_generic(cls, parameters, elen):
"""Check correct count for parameters of a generic cls (internal helper).
This gives a nice error message in case of count mismatch.
"""
if not elen:
raise TypeError(f"{cls} is not a generic class")
alen = len(parameters)
if alen != elen:
expect_val = elen
if hasattr(cls, "__parameters__"):
parameters = [p for p in cls.__parameters__ if not _is_unpack(p)]
# deal with TypeVarLike defaults
# required TypeVarLikes cannot appear after a defaulted one.
if alen < elen:
# since we validate TypeVarLike default in _collect_type_vars
# or _collect_parameters we can safely check parameters[alen]
if (
getattr(parameters[alen], '__default__', NoDefault)
is not NoDefault
):
return
num_default_tv = sum(getattr(p, '__default__', NoDefault)
is not NoDefault for p in parameters)
elen -= num_default_tv
expect_val = f"at least {elen}"
raise TypeError(f"Too {'many' if alen > elen else 'few'} arguments"
f" for {cls}; actual {alen}, expected {expect_val}")
if not _PEP_696_IMPLEMENTED:
typing._check_generic = _check_generic
def _has_generic_or_protocol_as_origin() -> bool:
try:
frame = sys._getframe(2)
# - Catch AttributeError: not all Python implementations have sys._getframe()
# - Catch ValueError: maybe we're called from an unexpected module
# and the call stack isn't deep enough
except (AttributeError, ValueError):
return False # err on the side of leniency
else:
# If we somehow get invoked from outside typing.py,
# also err on the side of leniency
if frame.f_globals.get("__name__") != "typing":
return False
origin = frame.f_locals.get("origin")
# Cannot use "in" because origin may be an object with a buggy __eq__ that
# throws an error.
return origin is typing.Generic or origin is Protocol or origin is typing.Protocol
_TYPEVARTUPLE_TYPES = {TypeVarTuple, getattr(typing, "TypeVarTuple", None)}
def _is_unpacked_typevartuple(x) -> bool:
if get_origin(x) is not Unpack:
return False
args = get_args(x)
return (
bool(args)
and len(args) == 1
and type(args[0]) in _TYPEVARTUPLE_TYPES
)
# Python 3.11+ _collect_type_vars was renamed to _collect_parameters
if hasattr(typing, '_collect_type_vars'):
def _collect_type_vars(types, typevar_types=None):
"""Collect all type variable contained in types in order of
first appearance (lexicographic order). For example::
_collect_type_vars((T, List[S, T])) == (T, S)
"""
if typevar_types is None:
typevar_types = typing.TypeVar
tvars = []
# A required TypeVarLike cannot appear after a TypeVarLike with a default
# if it was a direct call to `Generic[]` or `Protocol[]`
enforce_default_ordering = _has_generic_or_protocol_as_origin()
default_encountered = False
# Also, a TypeVarLike with a default cannot appear after a TypeVarTuple
type_var_tuple_encountered = False
for t in types:
if _is_unpacked_typevartuple(t):
type_var_tuple_encountered = True
elif isinstance(t, typevar_types) and t not in tvars:
if enforce_default_ordering:
has_default = getattr(t, '__default__', NoDefault) is not NoDefault
if has_default:
if type_var_tuple_encountered:
raise TypeError('Type parameter with a default'
' follows TypeVarTuple')
default_encountered = True
elif default_encountered:
raise TypeError(f'Type parameter {t!r} without a default'
' follows type parameter with a default')
tvars.append(t)
if _should_collect_from_parameters(t):
tvars.extend([t for t in t.__parameters__ if t not in tvars])
return tuple(tvars)
typing._collect_type_vars = _collect_type_vars
else:
def _collect_parameters(args):
"""Collect all type variables and parameter specifications in args
in order of first appearance (lexicographic order).
For example::
assert _collect_parameters((T, Callable[P, T])) == (T, P)
"""
parameters = []
# A required TypeVarLike cannot appear after a TypeVarLike with default
# if it was a direct call to `Generic[]` or `Protocol[]`
enforce_default_ordering = _has_generic_or_protocol_as_origin()
default_encountered = False
# Also, a TypeVarLike with a default cannot appear after a TypeVarTuple
type_var_tuple_encountered = False
for t in args:
if isinstance(t, type):
# We don't want __parameters__ descriptor of a bare Python class.
pass
elif isinstance(t, tuple):
# `t` might be a tuple, when `ParamSpec` is substituted with
# `[T, int]`, or `[int, *Ts]`, etc.
for x in t:
for collected in _collect_parameters([x]):
if collected not in parameters:
parameters.append(collected)
elif hasattr(t, '__typing_subst__'):
if t not in parameters:
if enforce_default_ordering:
has_default = (
getattr(t, '__default__', NoDefault) is not NoDefault
)
if type_var_tuple_encountered and has_default:
raise TypeError('Type parameter with a default'
' follows TypeVarTuple')
if has_default:
default_encountered = True
elif default_encountered:
raise TypeError(f'Type parameter {t!r} without a default'
' follows type parameter with a default')
parameters.append(t)
else:
if _is_unpacked_typevartuple(t):
type_var_tuple_encountered = True
for x in getattr(t, '__parameters__', ()):
if x not in parameters:
parameters.append(x)
return tuple(parameters)
if not _PEP_696_IMPLEMENTED:
typing._collect_parameters = _collect_parameters
# Backport typing.NamedTuple as it exists in Python 3.13.
# In 3.11, the ability to define generic `NamedTuple`s was supported.
# This was explicitly disallowed in 3.9-3.10, and only half-worked in <=3.8.
# On 3.12, we added __orig_bases__ to call-based NamedTuples
# On 3.13, we deprecated kwargs-based NamedTuples
if sys.version_info >= (3, 13):
NamedTuple = typing.NamedTuple
else:
def _make_nmtuple(name, types, module, defaults=()):
fields = [n for n, t in types]
annotations = {n: typing._type_check(t, f"field {n} annotation must be a type")
for n, t in types}
nm_tpl = collections.namedtuple(name, fields,
defaults=defaults, module=module)
nm_tpl.__annotations__ = nm_tpl.__new__.__annotations__ = annotations
# The `_field_types` attribute was removed in 3.9;
# in earlier versions, it is the same as the `__annotations__` attribute
if sys.version_info < (3, 9):
nm_tpl._field_types = annotations
return nm_tpl
_prohibited_namedtuple_fields = typing._prohibited
_special_namedtuple_fields = frozenset({'__module__', '__name__', '__annotations__'})
class _NamedTupleMeta(type):
def __new__(cls, typename, bases, ns):
assert _NamedTuple in bases
for base in bases:
if base is not _NamedTuple and base is not typing.Generic:
raise TypeError(
'can only inherit from a NamedTuple type and Generic')
bases = tuple(tuple if base is _NamedTuple else base for base in bases)
if "__annotations__" in ns:
types = ns["__annotations__"]
elif "__annotate__" in ns:
# TODO: Use inspect.VALUE here, and make the annotations lazily evaluated
types = ns["__annotate__"](1)
else:
types = {}
default_names = []
for field_name in types:
if field_name in ns:
default_names.append(field_name)
elif default_names:
raise TypeError(f"Non-default namedtuple field {field_name} "
f"cannot follow default field"
f"{'s' if len(default_names) > 1 else ''} "
f"{', '.join(default_names)}")
nm_tpl = _make_nmtuple(
typename, types.items(),
defaults=[ns[n] for n in default_names],
module=ns['__module__']
)
nm_tpl.__bases__ = bases
if typing.Generic in bases:
if hasattr(typing, '_generic_class_getitem'): # 3.12+
nm_tpl.__class_getitem__ = classmethod(typing._generic_class_getitem)
else:
class_getitem = typing.Generic.__class_getitem__.__func__
nm_tpl.__class_getitem__ = classmethod(class_getitem)
# update from user namespace without overriding special namedtuple attributes
for key, val in ns.items():
if key in _prohibited_namedtuple_fields:
raise AttributeError("Cannot overwrite NamedTuple attribute " + key)
elif key not in _special_namedtuple_fields:
if key not in nm_tpl._fields:
setattr(nm_tpl, key, ns[key])
try:
set_name = type(val).__set_name__
except AttributeError:
pass
else:
try:
set_name(val, nm_tpl, key)
except BaseException as e:
msg = (
f"Error calling __set_name__ on {type(val).__name__!r} "
f"instance {key!r} in {typename!r}"
)
# BaseException.add_note() existed on py311,
# but the __set_name__ machinery didn't start
# using add_note() until py312.
# Making sure exceptions are raised in the same way
# as in "normal" classes seems most important here.
if sys.version_info >= (3, 12):
e.add_note(msg)
raise
else:
raise RuntimeError(msg) from e
if typing.Generic in bases:
nm_tpl.__init_subclass__()
return nm_tpl
_NamedTuple = type.__new__(_NamedTupleMeta, 'NamedTuple', (), {})
def _namedtuple_mro_entries(bases):
assert NamedTuple in bases
return (_NamedTuple,)
@_ensure_subclassable(_namedtuple_mro_entries)
def NamedTuple(typename, fields=_marker, /, **kwargs):
"""Typed version of namedtuple.
Usage::
class Employee(NamedTuple):
name: str
id: int
This is equivalent to::
Employee = collections.namedtuple('Employee', ['name', 'id'])
The resulting class has an extra __annotations__ attribute, giving a
dict that maps field names to types. (The field names are also in
the _fields attribute, which is part of the namedtuple API.)
An alternative equivalent functional syntax is also accepted::
Employee = NamedTuple('Employee', [('name', str), ('id', int)])
"""
if fields is _marker:
if kwargs:
deprecated_thing = "Creating NamedTuple classes using keyword arguments"
deprecation_msg = (
"{name} is deprecated and will be disallowed in Python {remove}. "
"Use the class-based or functional syntax instead."
)
else:
deprecated_thing = "Failing to pass a value for the 'fields' parameter"
example = f"`{typename} = NamedTuple({typename!r}, [])`"
deprecation_msg = (
"{name} is deprecated and will be disallowed in Python {remove}. "
"To create a NamedTuple class with 0 fields "
"using the functional syntax, "
"pass an empty list, e.g. "
) + example + "."
elif fields is None:
if kwargs:
raise TypeError(
"Cannot pass `None` as the 'fields' parameter "
"and also specify fields using keyword arguments"
)
else:
deprecated_thing = "Passing `None` as the 'fields' parameter"
example = f"`{typename} = NamedTuple({typename!r}, [])`"
deprecation_msg = (
"{name} is deprecated and will be disallowed in Python {remove}. "
"To create a NamedTuple class with 0 fields "
"using the functional syntax, "
"pass an empty list, e.g. "
) + example + "."
elif kwargs:
raise TypeError("Either list of fields or keywords"
" can be provided to NamedTuple, not both")
if fields is _marker or fields is None:
warnings.warn(
deprecation_msg.format(name=deprecated_thing, remove="3.15"),
DeprecationWarning,
stacklevel=2,
)
fields = kwargs.items()
nt = _make_nmtuple(typename, fields, module=_caller())
nt.__orig_bases__ = (NamedTuple,)
return nt
if hasattr(collections.abc, "Buffer"):
Buffer = collections.abc.Buffer
else:
class Buffer(abc.ABC): # noqa: B024
"""Base class for classes that implement the buffer protocol.
The buffer protocol allows Python objects to expose a low-level
memory buffer interface. Before Python 3.12, it is not possible
to implement the buffer protocol in pure Python code, or even
to check whether a class implements the buffer protocol. In
Python 3.12 and higher, the ``__buffer__`` method allows access
to the buffer protocol from Python code, and the
``collections.abc.Buffer`` ABC allows checking whether a class
implements the buffer protocol.
To indicate support for the buffer protocol in earlier versions,
inherit from this ABC, either in a stub file or at runtime,
or use ABC registration. This ABC provides no methods, because
there is no Python-accessible methods shared by pre-3.12 buffer
classes. It is useful primarily for static checks.
"""
# As a courtesy, register the most common stdlib buffer classes.
Buffer.register(memoryview)
Buffer.register(bytearray)
Buffer.register(bytes)
# Backport of types.get_original_bases, available on 3.12+ in CPython
if hasattr(_types, "get_original_bases"):
get_original_bases = _types.get_original_bases
else:
def get_original_bases(cls, /):
"""Return the class's "original" bases prior to modification by `__mro_entries__`.
Examples::
from typing import TypeVar, Generic
from metaflow._vendor.typing_extensions import NamedTuple, TypedDict
T = TypeVar("T")
class Foo(Generic[T]): ...
class Bar(Foo[int], float): ...
class Baz(list[str]): ...
Eggs = NamedTuple("Eggs", [("a", int), ("b", str)])
Spam = TypedDict("Spam", {"a": int, "b": str})
assert get_original_bases(Bar) == (Foo[int], float)
assert get_original_bases(Baz) == (list[str],)
assert get_original_bases(Eggs) == (NamedTuple,)
assert get_original_bases(Spam) == (TypedDict,)
assert get_original_bases(int) == (object,)
"""
try:
return cls.__dict__.get("__orig_bases__", cls.__bases__)
except AttributeError:
raise TypeError(
f'Expected an instance of type, not {type(cls).__name__!r}'
) from None
# NewType is a class on Python 3.10+, making it pickleable
# The error message for subclassing instances of NewType was improved on 3.11+
if sys.version_info >= (3, 11):
NewType = typing.NewType
else:
class NewType:
"""NewType creates simple unique types with almost zero
runtime overhead. NewType(name, tp) is considered a subtype of tp
by static type checkers. At runtime, NewType(name, tp) returns
a dummy callable that simply returns its argument. Usage::
UserId = NewType('UserId', int)
def name_by_id(user_id: UserId) -> str:
...
UserId('user') # Fails type check
name_by_id(42) # Fails type check
name_by_id(UserId(42)) # OK
num = UserId(5) + 1 # type: int
"""
def __call__(self, obj, /):
return obj
def __init__(self, name, tp):
self.__qualname__ = name
if '.' in name:
name = name.rpartition('.')[-1]
self.__name__ = name
self.__supertype__ = tp
def_mod = _caller()
if def_mod != 'typing_extensions':
self.__module__ = def_mod
def __mro_entries__(self, bases):
# We defined __mro_entries__ to get a better error message
# if a user attempts to subclass a NewType instance. bpo-46170
supercls_name = self.__name__
class Dummy:
def __init_subclass__(cls):
subcls_name = cls.__name__
raise TypeError(
f"Cannot subclass an instance of NewType. "
f"Perhaps you were looking for: "
f"`{subcls_name} = NewType({subcls_name!r}, {supercls_name})`"
)
return (Dummy,)
def __repr__(self):
return f'{self.__module__}.{self.__qualname__}'
def __reduce__(self):
return self.__qualname__
if sys.version_info >= (3, 10):
# PEP 604 methods
# It doesn't make sense to have these methods on Python <3.10
def __or__(self, other):
return typing.Union[self, other]
def __ror__(self, other):
return typing.Union[other, self]
if hasattr(typing, "TypeAliasType"):
TypeAliasType = typing.TypeAliasType
else:
def _is_unionable(obj):
"""Corresponds to is_unionable() in unionobject.c in CPython."""
return obj is None or isinstance(obj, (
type,
_types.GenericAlias,
_types.UnionType,
TypeAliasType,
))
class TypeAliasType:
"""Create named, parameterized type aliases.
This provides a backport of the new `type` statement in Python 3.12:
type ListOrSet[T] = list[T] | set[T]
is equivalent to:
T = TypeVar("T")
ListOrSet = TypeAliasType("ListOrSet", list[T] | set[T], type_params=(T,))
The name ListOrSet can then be used as an alias for the type it refers to.
The type_params argument should contain all the type parameters used
in the value of the type alias. If the alias is not generic, this
argument is omitted.
Static type checkers should only support type aliases declared using
TypeAliasType that follow these rules:
- The first argument (the name) must be a string literal.
- The TypeAliasType instance must be immediately assigned to a variable
of the same name. (For example, 'X = TypeAliasType("Y", int)' is invalid,
as is 'X, Y = TypeAliasType("X", int), TypeAliasType("Y", int)').
"""
def __init__(self, name: str, value, *, type_params=()):
if not isinstance(name, str):
raise TypeError("TypeAliasType name must be a string")
self.__value__ = value
self.__type_params__ = type_params
parameters = []
for type_param in type_params:
if isinstance(type_param, TypeVarTuple):
parameters.extend(type_param)
else:
parameters.append(type_param)
self.__parameters__ = tuple(parameters)
def_mod = _caller()
if def_mod != 'typing_extensions':
self.__module__ = def_mod
# Setting this attribute closes the TypeAliasType from further modification
self.__name__ = name
def __setattr__(self, name: str, value: object, /) -> None:
if hasattr(self, "__name__"):
self._raise_attribute_error(name)
super().__setattr__(name, value)
def __delattr__(self, name: str, /) -> Never:
self._raise_attribute_error(name)
def _raise_attribute_error(self, name: str) -> Never:
# Match the Python 3.12 error messages exactly
if name == "__name__":
raise AttributeError("readonly attribute")
elif name in {"__value__", "__type_params__", "__parameters__", "__module__"}:
raise AttributeError(
f"attribute '{name}' of 'typing.TypeAliasType' objects "
"is not writable"
)
else:
raise AttributeError(
f"'typing.TypeAliasType' object has no attribute '{name}'"
)
def __repr__(self) -> str:
return self.__name__
def __getitem__(self, parameters):
if not isinstance(parameters, tuple):
parameters = (parameters,)
parameters = [
typing._type_check(
item, f'Subscripting {self.__name__} requires a type.'
)
for item in parameters
]
return typing._GenericAlias(self, tuple(parameters))
def __reduce__(self):
return self.__name__
def __init_subclass__(cls, *args, **kwargs):
raise TypeError(
"type 'typing_extensions.TypeAliasType' is not an acceptable base type"
)
# The presence of this method convinces typing._type_check
# that TypeAliasTypes are types.
def __call__(self):
raise TypeError("Type alias is not callable")
if sys.version_info >= (3, 10):
def __or__(self, right):
# For forward compatibility with 3.12, reject Unions
# that are not accepted by the built-in Union.
if not _is_unionable(right):
return NotImplemented
return typing.Union[self, right]
def __ror__(self, left):
if not _is_unionable(left):
return NotImplemented
return typing.Union[left, self]
if hasattr(typing, "is_protocol"):
is_protocol = typing.is_protocol
get_protocol_members = typing.get_protocol_members
else:
def is_protocol(tp: type, /) -> bool:
"""Return True if the given type is a Protocol.
Example::
>>> from typing_extensions import Protocol, is_protocol
>>> class P(Protocol):
... def a(self) -> str: ...
... b: int
>>> is_protocol(P)
True
>>> is_protocol(int)
False
"""
return (
isinstance(tp, type)
and getattr(tp, '_is_protocol', False)
and tp is not Protocol
and tp is not typing.Protocol
)
def get_protocol_members(tp: type, /) -> typing.FrozenSet[str]:
"""Return the set of members defined in a Protocol.
Example::
>>> from typing_extensions import Protocol, get_protocol_members
>>> class P(Protocol):
... def a(self) -> str: ...
... b: int
>>> get_protocol_members(P)
frozenset({'a', 'b'})
Raise a TypeError for arguments that are not Protocols.
"""
if not is_protocol(tp):
raise TypeError(f'{tp!r} is not a Protocol')
if hasattr(tp, '__protocol_attrs__'):
return frozenset(tp.__protocol_attrs__)
return frozenset(_get_protocol_attrs(tp))
if hasattr(typing, "Doc"):
Doc = typing.Doc
else:
class Doc:
"""Define the documentation of a type annotation using ``Annotated``, to be
used in class attributes, function and method parameters, return values,
and variables.
The value should be a positional-only string literal to allow static tools
like editors and documentation generators to use it.
This complements docstrings.
The string value passed is available in the attribute ``documentation``.
Example::
>>> from typing_extensions import Annotated, Doc
>>> def hi(to: Annotated[str, Doc("Who to say hi to")]) -> None: ...
"""
def __init__(self, documentation: str, /) -> None:
self.documentation = documentation
def __repr__(self) -> str:
return f"Doc({self.documentation!r})"
def __hash__(self) -> int:
return hash(self.documentation)
def __eq__(self, other: object) -> bool:
if not isinstance(other, Doc):
return NotImplemented
return self.documentation == other.documentation
_CapsuleType = getattr(_types, "CapsuleType", None)
if _CapsuleType is None:
try:
import _socket
except ImportError:
pass
else:
_CAPI = getattr(_socket, "CAPI", None)
if _CAPI is not None:
_CapsuleType = type(_CAPI)
if _CapsuleType is not None:
CapsuleType = _CapsuleType
__all__.append("CapsuleType")
# Aliases for items that have always been in typing.
# Explicitly assign these (rather than using `from typing import *` at the top),
# so that we get a CI error if one of these is deleted from typing.py
# in a future version of Python
AbstractSet = typing.AbstractSet
AnyStr = typing.AnyStr
BinaryIO = typing.BinaryIO
Callable = typing.Callable
Collection = typing.Collection
Container = typing.Container
Dict = typing.Dict
ForwardRef = typing.ForwardRef
FrozenSet = typing.FrozenSet
Generic = typing.Generic
Hashable = typing.Hashable
IO = typing.IO
ItemsView = typing.ItemsView
Iterable = typing.Iterable
Iterator = typing.Iterator
KeysView = typing.KeysView
List = typing.List
Mapping = typing.Mapping
MappingView = typing.MappingView
Match = typing.Match
MutableMapping = typing.MutableMapping
MutableSequence = typing.MutableSequence
MutableSet = typing.MutableSet
Optional = typing.Optional
Pattern = typing.Pattern
Reversible = typing.Reversible
Sequence = typing.Sequence
Set = typing.Set
Sized = typing.Sized
TextIO = typing.TextIO
Tuple = typing.Tuple
Union = typing.Union
ValuesView = typing.ValuesView
cast = typing.cast
no_type_check = typing.no_type_check
no_type_check_decorator = typing.no_type_check_decorator
================================================
FILE: metaflow/_vendor/v3_6/__init__.py
================================================
# Empty file
================================================
FILE: metaflow/_vendor/v3_6/importlib_metadata/__init__.py
================================================
import os
import re
import abc
import csv
import sys
from metaflow._vendor.v3_6 import zipp
import email
import pathlib
import operator
import textwrap
import warnings
import functools
import itertools
import posixpath
import collections
from . import _adapters, _meta
from ._collections import FreezableDefaultDict, Pair
from ._compat import (
NullFinder,
install,
pypy_partial,
)
from ._functools import method_cache, pass_none
from ._itertools import always_iterable, unique_everseen
from ._meta import PackageMetadata, SimplePath
from contextlib import suppress
from importlib import import_module
from importlib.abc import MetaPathFinder
from itertools import starmap
from typing import List, Mapping, Optional, Union
__all__ = [
'Distribution',
'DistributionFinder',
'PackageMetadata',
'PackageNotFoundError',
'distribution',
'distributions',
'entry_points',
'files',
'metadata',
'packages_distributions',
'requires',
'version',
]
class PackageNotFoundError(ModuleNotFoundError):
"""The package was not found."""
def __str__(self):
return f"No package metadata was found for {self.name}"
@property
def name(self):
(name,) = self.args
return name
class Sectioned:
"""
A simple entry point config parser for performance
>>> for item in Sectioned.read(Sectioned._sample):
... print(item)
Pair(name='sec1', value='# comments ignored')
Pair(name='sec1', value='a = 1')
Pair(name='sec1', value='b = 2')
Pair(name='sec2', value='a = 2')
>>> res = Sectioned.section_pairs(Sectioned._sample)
>>> item = next(res)
>>> item.name
'sec1'
>>> item.value
Pair(name='a', value='1')
>>> item = next(res)
>>> item.value
Pair(name='b', value='2')
>>> item = next(res)
>>> item.name
'sec2'
>>> item.value
Pair(name='a', value='2')
>>> list(res)
[]
"""
_sample = textwrap.dedent(
"""
[sec1]
# comments ignored
a = 1
b = 2
[sec2]
a = 2
"""
).lstrip()
@classmethod
def section_pairs(cls, text):
return (
section._replace(value=Pair.parse(section.value))
for section in cls.read(text, filter_=cls.valid)
if section.name is not None
)
@staticmethod
def read(text, filter_=None):
lines = filter(filter_, map(str.strip, text.splitlines()))
name = None
for value in lines:
section_match = value.startswith('[') and value.endswith(']')
if section_match:
name = value.strip('[]')
continue
yield Pair(name, value)
@staticmethod
def valid(line):
return line and not line.startswith('#')
class DeprecatedTuple:
"""
Provide subscript item access for backward compatibility.
>>> recwarn = getfixture('recwarn')
>>> ep = EntryPoint(name='name', value='value', group='group')
>>> ep[:]
('name', 'value', 'group')
>>> ep[0]
'name'
>>> len(recwarn)
1
"""
_warn = functools.partial(
warnings.warn,
"EntryPoint tuple interface is deprecated. Access members by name.",
DeprecationWarning,
stacklevel=pypy_partial(2),
)
def __getitem__(self, item):
self._warn()
return self._key()[item]
class EntryPoint(DeprecatedTuple):
"""An entry point as defined by Python packaging conventions.
See `the packaging docs on entry points
`_
for more information.
"""
pattern = re.compile(
r'(?P[\w.]+)\s*'
r'(:\s*(?P[\w.]+))?\s*'
r'(?P\[.*\])?\s*$'
)
"""
A regular expression describing the syntax for an entry point,
which might look like:
- module
- package.module
- package.module:attribute
- package.module:object.attribute
- package.module:attr [extra1, extra2]
Other combinations are possible as well.
The expression is lenient about whitespace around the ':',
following the attr, and following any extras.
"""
dist: Optional['Distribution'] = None
def __init__(self, name, value, group):
vars(self).update(name=name, value=value, group=group)
def load(self):
"""Load the entry point from its definition. If only a module
is indicated by the value, return that module. Otherwise,
return the named object.
"""
match = self.pattern.match(self.value)
module = import_module(match.group('module'))
attrs = filter(None, (match.group('attr') or '').split('.'))
return functools.reduce(getattr, attrs, module)
@property
def module(self):
match = self.pattern.match(self.value)
return match.group('module')
@property
def attr(self):
match = self.pattern.match(self.value)
return match.group('attr')
@property
def extras(self):
match = self.pattern.match(self.value)
return list(re.finditer(r'\w+', match.group('extras') or ''))
def _for(self, dist):
vars(self).update(dist=dist)
return self
def __iter__(self):
"""
Supply iter so one may construct dicts of EntryPoints by name.
"""
msg = (
"Construction of dict of EntryPoints is deprecated in "
"favor of EntryPoints."
)
warnings.warn(msg, DeprecationWarning)
return iter((self.name, self))
def matches(self, **params):
attrs = (getattr(self, param) for param in params)
return all(map(operator.eq, params.values(), attrs))
def _key(self):
return self.name, self.value, self.group
def __lt__(self, other):
return self._key() < other._key()
def __eq__(self, other):
return self._key() == other._key()
def __setattr__(self, name, value):
raise AttributeError("EntryPoint objects are immutable.")
def __repr__(self):
return (
f'EntryPoint(name={self.name!r}, value={self.value!r}, '
f'group={self.group!r})'
)
def __hash__(self):
return hash(self._key())
class DeprecatedList(list):
"""
Allow an otherwise immutable object to implement mutability
for compatibility.
>>> recwarn = getfixture('recwarn')
>>> dl = DeprecatedList(range(3))
>>> dl[0] = 1
>>> dl.append(3)
>>> del dl[3]
>>> dl.reverse()
>>> dl.sort()
>>> dl.extend([4])
>>> dl.pop(-1)
4
>>> dl.remove(1)
>>> dl += [5]
>>> dl + [6]
[1, 2, 5, 6]
>>> dl + (6,)
[1, 2, 5, 6]
>>> dl.insert(0, 0)
>>> dl
[0, 1, 2, 5]
>>> dl == [0, 1, 2, 5]
True
>>> dl == (0, 1, 2, 5)
True
>>> len(recwarn)
1
"""
_warn = functools.partial(
warnings.warn,
"EntryPoints list interface is deprecated. Cast to list if needed.",
DeprecationWarning,
stacklevel=pypy_partial(2),
)
def _wrap_deprecated_method(method_name: str): # type: ignore
def wrapped(self, *args, **kwargs):
self._warn()
return getattr(super(), method_name)(*args, **kwargs)
return wrapped
for method_name in [
'__setitem__',
'__delitem__',
'append',
'reverse',
'extend',
'pop',
'remove',
'__iadd__',
'insert',
'sort',
]:
locals()[method_name] = _wrap_deprecated_method(method_name)
def __add__(self, other):
if not isinstance(other, tuple):
self._warn()
other = tuple(other)
return self.__class__(tuple(self) + other)
def __eq__(self, other):
if not isinstance(other, tuple):
self._warn()
other = tuple(other)
return tuple(self).__eq__(other)
class EntryPoints(DeprecatedList):
"""
An immutable collection of selectable EntryPoint objects.
"""
__slots__ = ()
def __getitem__(self, name): # -> EntryPoint:
"""
Get the EntryPoint in self matching name.
"""
if isinstance(name, int):
warnings.warn(
"Accessing entry points by index is deprecated. "
"Cast to tuple if needed.",
DeprecationWarning,
stacklevel=2,
)
return super().__getitem__(name)
try:
return next(iter(self.select(name=name)))
except StopIteration:
raise KeyError(name)
def select(self, **params):
"""
Select entry points from self that match the
given parameters (typically group and/or name).
"""
return EntryPoints(ep for ep in self if ep.matches(**params))
@property
def names(self):
"""
Return the set of all names of all entry points.
"""
return {ep.name for ep in self}
@property
def groups(self):
"""
Return the set of all groups of all entry points.
For coverage while SelectableGroups is present.
>>> EntryPoints().groups
set()
"""
return {ep.group for ep in self}
@classmethod
def _from_text_for(cls, text, dist):
return cls(ep._for(dist) for ep in cls._from_text(text))
@staticmethod
def _from_text(text):
return (
EntryPoint(name=item.value.name, value=item.value.value, group=item.name)
for item in Sectioned.section_pairs(text or '')
)
class Deprecated:
"""
Compatibility add-in for mapping to indicate that
mapping behavior is deprecated.
>>> recwarn = getfixture('recwarn')
>>> class DeprecatedDict(Deprecated, dict): pass
>>> dd = DeprecatedDict(foo='bar')
>>> dd.get('baz', None)
>>> dd['foo']
'bar'
>>> list(dd)
['foo']
>>> list(dd.keys())
['foo']
>>> 'foo' in dd
True
>>> list(dd.values())
['bar']
>>> len(recwarn)
1
"""
_warn = functools.partial(
warnings.warn,
"SelectableGroups dict interface is deprecated. Use select.",
DeprecationWarning,
stacklevel=pypy_partial(2),
)
def __getitem__(self, name):
self._warn()
return super().__getitem__(name)
def get(self, name, default=None):
self._warn()
return super().get(name, default)
def __iter__(self):
self._warn()
return super().__iter__()
def __contains__(self, *args):
self._warn()
return super().__contains__(*args)
def keys(self):
self._warn()
return super().keys()
def values(self):
self._warn()
return super().values()
class SelectableGroups(Deprecated, dict):
"""
A backward- and forward-compatible result from
entry_points that fully implements the dict interface.
"""
@classmethod
def load(cls, eps):
by_group = operator.attrgetter('group')
ordered = sorted(eps, key=by_group)
grouped = itertools.groupby(ordered, by_group)
return cls((group, EntryPoints(eps)) for group, eps in grouped)
@property
def _all(self):
"""
Reconstruct a list of all entrypoints from the groups.
"""
groups = super(Deprecated, self).values()
return EntryPoints(itertools.chain.from_iterable(groups))
@property
def groups(self):
return self._all.groups
@property
def names(self):
"""
for coverage:
>>> SelectableGroups().names
set()
"""
return self._all.names
def select(self, **params):
if not params:
return self
return self._all.select(**params)
class PackagePath(pathlib.PurePosixPath):
"""A reference to a path in a package"""
def read_text(self, encoding='utf-8'):
with self.locate().open(encoding=encoding) as stream:
return stream.read()
def read_binary(self):
with self.locate().open('rb') as stream:
return stream.read()
def locate(self):
"""Return a path-like object for this path"""
return self.dist.locate_file(self)
class FileHash:
def __init__(self, spec):
self.mode, _, self.value = spec.partition('=')
def __repr__(self):
return f''
class Distribution:
"""A Python distribution package."""
@abc.abstractmethod
def read_text(self, filename):
"""Attempt to load metadata file given by the name.
:param filename: The name of the file in the distribution info.
:return: The text if found, otherwise None.
"""
@abc.abstractmethod
def locate_file(self, path):
"""
Given a path to a file in this distribution, return a path
to it.
"""
@classmethod
def from_name(cls, name):
"""Return the Distribution for the given package name.
:param name: The name of the distribution package to search for.
:return: The Distribution instance (or subclass thereof) for the named
package, if found.
:raises PackageNotFoundError: When the named package's distribution
metadata cannot be found.
"""
for resolver in cls._discover_resolvers():
dists = resolver(DistributionFinder.Context(name=name))
dist = next(iter(dists), None)
if dist is not None:
return dist
else:
raise PackageNotFoundError(name)
@classmethod
def discover(cls, **kwargs):
"""Return an iterable of Distribution objects for all packages.
Pass a ``context`` or pass keyword arguments for constructing
a context.
:context: A ``DistributionFinder.Context`` object.
:return: Iterable of Distribution objects for all packages.
"""
context = kwargs.pop('context', None)
if context and kwargs:
raise ValueError("cannot accept context and kwargs")
context = context or DistributionFinder.Context(**kwargs)
return itertools.chain.from_iterable(
resolver(context) for resolver in cls._discover_resolvers()
)
@staticmethod
def at(path):
"""Return a Distribution for the indicated metadata path
:param path: a string or path-like object
:return: a concrete Distribution instance for the path
"""
return PathDistribution(pathlib.Path(path))
@staticmethod
def _discover_resolvers():
"""Search the meta_path for resolvers."""
declared = (
getattr(finder, 'find_distributions', None) for finder in sys.meta_path
)
return filter(None, declared)
@classmethod
def _local(cls, root='.'):
from pep517 import build, meta
system = build.compat_system(root)
builder = functools.partial(
meta.build,
source_dir=root,
system=system,
)
return PathDistribution(zipp.Path(meta.build_as_zip(builder)))
@property
def metadata(self) -> _meta.PackageMetadata:
"""Return the parsed metadata for this Distribution.
The returned object will have keys that name the various bits of
metadata. See PEP 566 for details.
"""
text = (
self.read_text('METADATA')
or self.read_text('PKG-INFO')
# This last clause is here to support old egg-info files. Its
# effect is to just end up using the PathDistribution's self._path
# (which points to the egg-info file) attribute unchanged.
or self.read_text('')
)
return _adapters.Message(email.message_from_string(text))
@property
def name(self):
"""Return the 'Name' metadata for the distribution package."""
return self.metadata['Name']
@property
def _normalized_name(self):
"""Return a normalized version of the name."""
return Prepared.normalize(self.name)
@property
def version(self):
"""Return the 'Version' metadata for the distribution package."""
return self.metadata['Version']
@property
def entry_points(self):
return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self)
@property
def files(self):
"""Files in this distribution.
:return: List of PackagePath for this distribution or None
Result is `None` if the metadata file that enumerates files
(i.e. RECORD for dist-info or SOURCES.txt for egg-info) is
missing.
Result may be empty if the metadata exists but is empty.
"""
def make_file(name, hash=None, size_str=None):
result = PackagePath(name)
result.hash = FileHash(hash) if hash else None
result.size = int(size_str) if size_str else None
result.dist = self
return result
@pass_none
def make_files(lines):
return list(starmap(make_file, csv.reader(lines)))
return make_files(self._read_files_distinfo() or self._read_files_egginfo())
def _read_files_distinfo(self):
"""
Read the lines of RECORD
"""
text = self.read_text('RECORD')
return text and text.splitlines()
def _read_files_egginfo(self):
"""
SOURCES.txt might contain literal commas, so wrap each line
in quotes.
"""
text = self.read_text('SOURCES.txt')
return text and map('"{}"'.format, text.splitlines())
@property
def requires(self):
"""Generated requirements specified for this Distribution"""
reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
return reqs and list(reqs)
def _read_dist_info_reqs(self):
return self.metadata.get_all('Requires-Dist')
def _read_egg_info_reqs(self):
source = self.read_text('requires.txt')
return source and self._deps_from_requires_text(source)
@classmethod
def _deps_from_requires_text(cls, source):
return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source))
@staticmethod
def _convert_egg_info_reqs_to_simple_reqs(sections):
"""
Historically, setuptools would solicit and store 'extra'
requirements, including those with environment markers,
in separate sections. More modern tools expect each
dependency to be defined separately, with any relevant
extras and environment markers attached directly to that
requirement. This method converts the former to the
latter. See _test_deps_from_requires_text for an example.
"""
def make_condition(name):
return name and f'extra == "{name}"'
def quoted_marker(section):
section = section or ''
extra, sep, markers = section.partition(':')
if extra and markers:
markers = f'({markers})'
conditions = list(filter(None, [markers, make_condition(extra)]))
return '; ' + ' and '.join(conditions) if conditions else ''
def url_req_space(req):
"""
PEP 508 requires a space between the url_spec and the quoted_marker.
Ref python/importlib_metadata#357.
"""
# '@' is uniquely indicative of a url_req.
return ' ' * ('@' in req)
for section in sections:
space = url_req_space(section.value)
yield section.value + space + quoted_marker(section.name)
class DistributionFinder(MetaPathFinder):
"""
A MetaPathFinder capable of discovering installed distributions.
"""
class Context:
"""
Keyword arguments presented by the caller to
``distributions()`` or ``Distribution.discover()``
to narrow the scope of a search for distributions
in all DistributionFinders.
Each DistributionFinder may expect any parameters
and should attempt to honor the canonical
parameters defined below when appropriate.
"""
name = None
"""
Specific name for which a distribution finder should match.
A name of ``None`` matches all distributions.
"""
def __init__(self, **kwargs):
vars(self).update(kwargs)
@property
def path(self):
"""
The sequence of directory path that a distribution finder
should search.
Typically refers to Python installed package paths such as
"site-packages" directories and defaults to ``sys.path``.
"""
return vars(self).get('path', sys.path)
@abc.abstractmethod
def find_distributions(self, context=Context()):
"""
Find distributions.
Return an iterable of all Distribution instances capable of
loading the metadata for packages matching the ``context``,
a DistributionFinder.Context instance.
"""
class FastPath:
"""
Micro-optimized class for searching a path for
children.
>>> FastPath('').children()
['...']
"""
@functools.lru_cache() # type: ignore
def __new__(cls, root):
return super().__new__(cls)
def __init__(self, root):
self.root = str(root)
def joinpath(self, child):
return pathlib.Path(self.root, child)
def children(self):
with suppress(Exception):
return os.listdir(self.root or '.')
with suppress(Exception):
return self.zip_children()
return []
def zip_children(self):
zip_path = zipp.Path(self.root)
names = zip_path.root.namelist()
self.joinpath = zip_path.joinpath
return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names)
def search(self, name):
return self.lookup(self.mtime).search(name)
@property
def mtime(self):
with suppress(OSError):
return os.stat(self.root).st_mtime
self.lookup.cache_clear()
@method_cache
def lookup(self, mtime):
return Lookup(self)
class Lookup:
def __init__(self, path: FastPath):
base = os.path.basename(path.root).lower()
base_is_egg = base.endswith(".egg")
self.infos = FreezableDefaultDict(list)
self.eggs = FreezableDefaultDict(list)
for child in path.children():
low = child.lower()
if low.endswith((".dist-info", ".egg-info")):
# rpartition is faster than splitext and suitable for this purpose.
name = low.rpartition(".")[0].partition("-")[0]
normalized = Prepared.normalize(name)
self.infos[normalized].append(path.joinpath(child))
elif base_is_egg and low == "egg-info":
name = base.rpartition(".")[0].partition("-")[0]
legacy_normalized = Prepared.legacy_normalize(name)
self.eggs[legacy_normalized].append(path.joinpath(child))
self.infos.freeze()
self.eggs.freeze()
def search(self, prepared):
infos = (
self.infos[prepared.normalized]
if prepared
else itertools.chain.from_iterable(self.infos.values())
)
eggs = (
self.eggs[prepared.legacy_normalized]
if prepared
else itertools.chain.from_iterable(self.eggs.values())
)
return itertools.chain(infos, eggs)
class Prepared:
"""
A prepared search for metadata on a possibly-named package.
"""
normalized = None
legacy_normalized = None
def __init__(self, name):
self.name = name
if name is None:
return
self.normalized = self.normalize(name)
self.legacy_normalized = self.legacy_normalize(name)
@staticmethod
def normalize(name):
"""
PEP 503 normalization plus dashes as underscores.
"""
return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_')
@staticmethod
def legacy_normalize(name):
"""
Normalize the package name as found in the convention in
older packaging tools versions and specs.
"""
return name.lower().replace('-', '_')
def __bool__(self):
return bool(self.name)
@install
class MetadataPathFinder(NullFinder, DistributionFinder):
"""A degenerate finder for distribution packages on the file system.
This finder supplies only a find_distributions() method for versions
of Python that do not have a PathFinder find_distributions().
"""
def find_distributions(self, context=DistributionFinder.Context()):
"""
Find distributions.
Return an iterable of all Distribution instances capable of
loading the metadata for packages matching ``context.name``
(or all names if ``None`` indicated) along the paths in the list
of directories ``context.path``.
"""
found = self._search_paths(context.name, context.path)
return map(PathDistribution, found)
@classmethod
def _search_paths(cls, name, paths):
"""Find metadata directories in paths heuristically."""
prepared = Prepared(name)
return itertools.chain.from_iterable(
path.search(prepared) for path in map(FastPath, paths)
)
def invalidate_caches(cls):
FastPath.__new__.cache_clear()
class PathDistribution(Distribution):
def __init__(self, path: SimplePath):
"""Construct a distribution.
:param path: SimplePath indicating the metadata directory.
"""
self._path = path
def read_text(self, filename):
with suppress(
FileNotFoundError,
IsADirectoryError,
KeyError,
NotADirectoryError,
PermissionError,
):
return self._path.joinpath(filename).read_text(encoding='utf-8')
read_text.__doc__ = Distribution.read_text.__doc__
def locate_file(self, path):
return self._path.parent / path
@property
def _normalized_name(self):
"""
Performance optimization: where possible, resolve the
normalized name from the file system path.
"""
stem = os.path.basename(str(self._path))
return self._name_from_stem(stem) or super()._normalized_name
def _name_from_stem(self, stem):
name, ext = os.path.splitext(stem)
if ext not in ('.dist-info', '.egg-info'):
return
name, sep, rest = stem.partition('-')
return name
def distribution(distribution_name):
"""Get the ``Distribution`` instance for the named package.
:param distribution_name: The name of the distribution package as a string.
:return: A ``Distribution`` instance (or subclass thereof).
"""
return Distribution.from_name(distribution_name)
def distributions(**kwargs):
"""Get all ``Distribution`` instances in the current environment.
:return: An iterable of ``Distribution`` instances.
"""
return Distribution.discover(**kwargs)
def metadata(distribution_name) -> _meta.PackageMetadata:
"""Get the metadata for the named package.
:param distribution_name: The name of the distribution package to query.
:return: A PackageMetadata containing the parsed metadata.
"""
return Distribution.from_name(distribution_name).metadata
def version(distribution_name):
"""Get the version string for the named package.
:param distribution_name: The name of the distribution package to query.
:return: The version string for the package as defined in the package's
"Version" metadata key.
"""
return distribution(distribution_name).version
def entry_points(**params) -> Union[EntryPoints, SelectableGroups]:
"""Return EntryPoint objects for all installed packages.
Pass selection parameters (group or name) to filter the
result to entry points matching those properties (see
EntryPoints.select()).
For compatibility, returns ``SelectableGroups`` object unless
selection parameters are supplied. In the future, this function
will return ``EntryPoints`` instead of ``SelectableGroups``
even when no selection parameters are supplied.
For maximum future compatibility, pass selection parameters
or invoke ``.select`` with parameters on the result.
:return: EntryPoints or SelectableGroups for all installed packages.
"""
norm_name = operator.attrgetter('_normalized_name')
unique = functools.partial(unique_everseen, key=norm_name)
eps = itertools.chain.from_iterable(
dist.entry_points for dist in unique(distributions())
)
return SelectableGroups.load(eps).select(**params)
def files(distribution_name):
"""Return a list of files for the named package.
:param distribution_name: The name of the distribution package to query.
:return: List of files composing the distribution.
"""
return distribution(distribution_name).files
def requires(distribution_name):
"""
Return a list of requirements for the named package.
:return: An iterator of requirements, suitable for
packaging.requirement.Requirement.
"""
return distribution(distribution_name).requires
def packages_distributions() -> Mapping[str, List[str]]:
"""
Return a mapping of top-level packages to their
distributions.
>>> import collections.abc
>>> pkgs = packages_distributions()
>>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values())
True
"""
pkg_to_dist = collections.defaultdict(list)
for dist in distributions():
for pkg in _top_level_declared(dist) or _top_level_inferred(dist):
pkg_to_dist[pkg].append(dist.metadata['Name'])
return dict(pkg_to_dist)
def _top_level_declared(dist):
return (dist.read_text('top_level.txt') or '').split()
def _top_level_inferred(dist):
return {
f.parts[0] if len(f.parts) > 1 else f.with_suffix('').name
for f in always_iterable(dist.files)
if f.suffix == ".py"
}
================================================
FILE: metaflow/_vendor/v3_6/importlib_metadata/_adapters.py
================================================
import re
import textwrap
import email.message
from ._text import FoldedCase
class Message(email.message.Message):
multiple_use_keys = set(
map(
FoldedCase,
[
'Classifier',
'Obsoletes-Dist',
'Platform',
'Project-URL',
'Provides-Dist',
'Provides-Extra',
'Requires-Dist',
'Requires-External',
'Supported-Platform',
'Dynamic',
],
)
)
"""
Keys that may be indicated multiple times per PEP 566.
"""
def __new__(cls, orig: email.message.Message):
res = super().__new__(cls)
vars(res).update(vars(orig))
return res
def __init__(self, *args, **kwargs):
self._headers = self._repair_headers()
# suppress spurious error from mypy
def __iter__(self):
return super().__iter__()
def _repair_headers(self):
def redent(value):
"Correct for RFC822 indentation"
if not value or '\n' not in value:
return value
return textwrap.dedent(' ' * 8 + value)
headers = [(key, redent(value)) for key, value in vars(self)['_headers']]
if self._payload:
headers.append(('Description', self.get_payload()))
return headers
@property
def json(self):
"""
Convert PackageMetadata to a JSON-compatible format
per PEP 0566.
"""
def transform(key):
value = self.get_all(key) if key in self.multiple_use_keys else self[key]
if key == 'Keywords':
value = re.split(r'\s+', value)
tk = key.lower().replace('-', '_')
return tk, value
return dict(map(transform, map(FoldedCase, self)))
================================================
FILE: metaflow/_vendor/v3_6/importlib_metadata/_collections.py
================================================
import collections
# from jaraco.collections 3.3
class FreezableDefaultDict(collections.defaultdict):
"""
Often it is desirable to prevent the mutation of
a default dict after its initial construction, such
as to prevent mutation during iteration.
>>> dd = FreezableDefaultDict(list)
>>> dd[0].append('1')
>>> dd.freeze()
>>> dd[1]
[]
>>> len(dd)
1
"""
def __missing__(self, key):
return getattr(self, '_frozen', super().__missing__)(key)
def freeze(self):
self._frozen = lambda key: self.default_factory()
class Pair(collections.namedtuple('Pair', 'name value')):
@classmethod
def parse(cls, text):
return cls(*map(str.strip, text.split("=", 1)))
================================================
FILE: metaflow/_vendor/v3_6/importlib_metadata/_compat.py
================================================
import sys
import platform
__all__ = ['install', 'NullFinder', 'Protocol']
try:
from typing import Protocol
except ImportError: # pragma: no cover
from metaflow._vendor.v3_6.typing_extensions import Protocol # type: ignore
def install(cls):
"""
Class decorator for installation on sys.meta_path.
Adds the backport DistributionFinder to sys.meta_path and
attempts to disable the finder functionality of the stdlib
DistributionFinder.
"""
sys.meta_path.append(cls())
disable_stdlib_finder()
return cls
def disable_stdlib_finder():
"""
Give the backport primacy for discovering path-based distributions
by monkey-patching the stdlib O_O.
See #91 for more background for rationale on this sketchy
behavior.
"""
def matches(finder):
return getattr(
finder, '__module__', None
) == '_frozen_importlib_external' and hasattr(finder, 'find_distributions')
for finder in filter(matches, sys.meta_path): # pragma: nocover
del finder.find_distributions
class NullFinder:
"""
A "Finder" (aka "MetaClassFinder") that never finds any modules,
but may find distributions.
"""
@staticmethod
def find_spec(*args, **kwargs):
return None
# In Python 2, the import system requires finders
# to have a find_module() method, but this usage
# is deprecated in Python 3 in favor of find_spec().
# For the purposes of this finder (i.e. being present
# on sys.meta_path but having no other import
# system functionality), the two methods are identical.
find_module = find_spec
def pypy_partial(val):
"""
Adjust for variable stacklevel on partial under PyPy.
Workaround for #327.
"""
is_pypy = platform.python_implementation() == 'PyPy'
return val + is_pypy
================================================
FILE: metaflow/_vendor/v3_6/importlib_metadata/_functools.py
================================================
import types
import functools
# from jaraco.functools 3.3
def method_cache(method, cache_wrapper=None):
"""
Wrap lru_cache to support storing the cache data in the object instances.
Abstracts the common paradigm where the method explicitly saves an
underscore-prefixed protected property on first call and returns that
subsequently.
>>> class MyClass:
... calls = 0
...
... @method_cache
... def method(self, value):
... self.calls += 1
... return value
>>> a = MyClass()
>>> a.method(3)
3
>>> for x in range(75):
... res = a.method(x)
>>> a.calls
75
Note that the apparent behavior will be exactly like that of lru_cache
except that the cache is stored on each instance, so values in one
instance will not flush values from another, and when an instance is
deleted, so are the cached values for that instance.
>>> b = MyClass()
>>> for x in range(35):
... res = b.method(x)
>>> b.calls
35
>>> a.method(0)
0
>>> a.calls
75
Note that if method had been decorated with ``functools.lru_cache()``,
a.calls would have been 76 (due to the cached value of 0 having been
flushed by the 'b' instance).
Clear the cache with ``.cache_clear()``
>>> a.method.cache_clear()
Same for a method that hasn't yet been called.
>>> c = MyClass()
>>> c.method.cache_clear()
Another cache wrapper may be supplied:
>>> cache = functools.lru_cache(maxsize=2)
>>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache)
>>> a = MyClass()
>>> a.method2()
3
Caution - do not subsequently wrap the method with another decorator, such
as ``@property``, which changes the semantics of the function.
See also
http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/
for another implementation and additional justification.
"""
cache_wrapper = cache_wrapper or functools.lru_cache()
def wrapper(self, *args, **kwargs):
# it's the first call, replace the method with a cached, bound method
bound_method = types.MethodType(method, self)
cached_method = cache_wrapper(bound_method)
setattr(self, method.__name__, cached_method)
return cached_method(*args, **kwargs)
# Support cache clear even before cache has been created.
wrapper.cache_clear = lambda: None
return wrapper
# From jaraco.functools 3.3
def pass_none(func):
"""
Wrap func so it's not called if its first param is None
>>> print_text = pass_none(print)
>>> print_text('text')
text
>>> print_text(None)
"""
@functools.wraps(func)
def wrapper(param, *args, **kwargs):
if param is not None:
return func(param, *args, **kwargs)
return wrapper
================================================
FILE: metaflow/_vendor/v3_6/importlib_metadata/_itertools.py
================================================
from itertools import filterfalse
def unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D
seen = set()
seen_add = seen.add
if key is None:
for element in filterfalse(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element
# copied from more_itertools 8.8
def always_iterable(obj, base_type=(str, bytes)):
"""If *obj* is iterable, return an iterator over its items::
>>> obj = (1, 2, 3)
>>> list(always_iterable(obj))
[1, 2, 3]
If *obj* is not iterable, return a one-item iterable containing *obj*::
>>> obj = 1
>>> list(always_iterable(obj))
[1]
If *obj* is ``None``, return an empty iterable:
>>> obj = None
>>> list(always_iterable(None))
[]
By default, binary and text strings are not considered iterable::
>>> obj = 'foo'
>>> list(always_iterable(obj))
['foo']
If *base_type* is set, objects for which ``isinstance(obj, base_type)``
returns ``True`` won't be considered iterable.
>>> obj = {'a': 1}
>>> list(always_iterable(obj)) # Iterate over the dict's keys
['a']
>>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit
[{'a': 1}]
Set *base_type* to ``None`` to avoid any special handling and treat objects
Python considers iterable as iterable:
>>> obj = 'foo'
>>> list(always_iterable(obj, base_type=None))
['f', 'o', 'o']
"""
if obj is None:
return iter(())
if (base_type is not None) and isinstance(obj, base_type):
return iter((obj,))
try:
return iter(obj)
except TypeError:
return iter((obj,))
================================================
FILE: metaflow/_vendor/v3_6/importlib_metadata/_meta.py
================================================
from ._compat import Protocol
from typing import Any, Dict, Iterator, List, TypeVar, Union
_T = TypeVar("_T")
class PackageMetadata(Protocol):
def __len__(self) -> int:
... # pragma: no cover
def __contains__(self, item: str) -> bool:
... # pragma: no cover
def __getitem__(self, key: str) -> str:
... # pragma: no cover
def __iter__(self) -> Iterator[str]:
... # pragma: no cover
def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]:
"""
Return all values associated with a possibly multi-valued key.
"""
@property
def json(self) -> Dict[str, Union[str, List[str]]]:
"""
A JSON-compatible form of the metadata.
"""
class SimplePath(Protocol):
"""
A minimal subset of pathlib.Path required by PathDistribution.
"""
def joinpath(self) -> 'SimplePath':
... # pragma: no cover
def __truediv__(self) -> 'SimplePath':
... # pragma: no cover
def parent(self) -> 'SimplePath':
... # pragma: no cover
def read_text(self) -> str:
... # pragma: no cover
================================================
FILE: metaflow/_vendor/v3_6/importlib_metadata/_text.py
================================================
import re
from ._functools import method_cache
# from jaraco.text 3.5
class FoldedCase(str):
"""
A case insensitive string class; behaves just like str
except compares equal when the only variation is case.
>>> s = FoldedCase('hello world')
>>> s == 'Hello World'
True
>>> 'Hello World' == s
True
>>> s != 'Hello World'
False
>>> s.index('O')
4
>>> s.split('O')
['hell', ' w', 'rld']
>>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
['alpha', 'Beta', 'GAMMA']
Sequence membership is straightforward.
>>> "Hello World" in [s]
True
>>> s in ["Hello World"]
True
You may test for set inclusion, but candidate and elements
must both be folded.
>>> FoldedCase("Hello World") in {s}
True
>>> s in {FoldedCase("Hello World")}
True
String inclusion works as long as the FoldedCase object
is on the right.
>>> "hello" in FoldedCase("Hello World")
True
But not if the FoldedCase object is on the left:
>>> FoldedCase('hello') in 'Hello World'
False
In that case, use in_:
>>> FoldedCase('hello').in_('Hello World')
True
>>> FoldedCase('hello') > FoldedCase('Hello')
False
"""
def __lt__(self, other):
return self.lower() < other.lower()
def __gt__(self, other):
return self.lower() > other.lower()
def __eq__(self, other):
return self.lower() == other.lower()
def __ne__(self, other):
return self.lower() != other.lower()
def __hash__(self):
return hash(self.lower())
def __contains__(self, other):
return super().lower().__contains__(other.lower())
def in_(self, other):
"Does self appear in other?"
return self in FoldedCase(other)
# cache lower since it's likely to be called frequently.
@method_cache
def lower(self):
return super().lower()
def index(self, sub):
return self.lower().index(sub.lower())
def split(self, splitter=' ', maxsplit=0):
pattern = re.compile(re.escape(splitter), re.I)
return pattern.split(self, maxsplit)
================================================
FILE: metaflow/_vendor/v3_6/importlib_metadata/py.typed
================================================
================================================
FILE: metaflow/_vendor/v3_6/importlib_metadata.LICENSE
================================================
Copyright 2017-2019 Jason R. Coombs, Barry Warsaw
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: metaflow/_vendor/v3_6/typing_extensions.LICENSE
================================================
A. HISTORY OF THE SOFTWARE
==========================
Python was created in the early 1990s by Guido van Rossum at Stichting
Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
as a successor of a language called ABC. Guido remains Python's
principal author, although it includes many contributions from others.
In 1995, Guido continued his work on Python at the Corporation for
National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
in Reston, Virginia where he released several versions of the
software.
In May 2000, Guido and the Python core development team moved to
BeOpen.com to form the BeOpen PythonLabs team. In October of the same
year, the PythonLabs team moved to Digital Creations (now Zope
Corporation, see http://www.zope.com). In 2001, the Python Software
Foundation (PSF, see http://www.python.org/psf/) was formed, a
non-profit organization created specifically to own Python-related
Intellectual Property. Zope Corporation is a sponsoring member of
the PSF.
All Python releases are Open Source (see http://www.opensource.org for
the Open Source Definition). Historically, most, but not all, Python
releases have also been GPL-compatible; the table below summarizes
the various releases.
Release Derived Year Owner GPL-
from compatible? (1)
0.9.0 thru 1.2 1991-1995 CWI yes
1.3 thru 1.5.2 1.2 1995-1999 CNRI yes
1.6 1.5.2 2000 CNRI no
2.0 1.6 2000 BeOpen.com no
1.6.1 1.6 2001 CNRI yes (2)
2.1 2.0+1.6.1 2001 PSF no
2.0.1 2.0+1.6.1 2001 PSF yes
2.1.1 2.1+2.0.1 2001 PSF yes
2.1.2 2.1.1 2002 PSF yes
2.1.3 2.1.2 2002 PSF yes
2.2 and above 2.1.1 2001-now PSF yes
Footnotes:
(1) GPL-compatible doesn't mean that we're distributing Python under
the GPL. All Python licenses, unlike the GPL, let you distribute
a modified version without making your changes open source. The
GPL-compatible licenses make it possible to combine Python with
other software that is released under the GPL; the others don't.
(2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
because its license has a choice of law clause. According to
CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
is "not incompatible" with the GPL.
Thanks to the many outside volunteers who have worked under Guido's
direction to make these releases possible.
B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
===============================================================
PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
--------------------------------------------
1. This LICENSE AGREEMENT is between the Python Software Foundation
("PSF"), and the Individual or Organization ("Licensee") accessing and
otherwise using this software ("Python") in source or binary form and
its associated documentation.
2. Subject to the terms and conditions of this License Agreement, PSF hereby
grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
analyze, test, perform and/or display publicly, prepare derivative works,
distribute, and otherwise use Python alone or in any derivative version,
provided, however, that PSF's License Agreement and PSF's notice of copyright,
i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
2011, 2012, 2013, 2014 Python Software Foundation; All Rights Reserved" are
retained in Python alone or in any derivative version prepared by Licensee.
3. In the event Licensee prepares a derivative work that is based on
or incorporates Python or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to Python.
4. PSF is making Python available to Licensee on an "AS IS"
basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between PSF and
Licensee. This License Agreement does not grant permission to use PSF
trademarks or trade name in a trademark sense to endorse or promote
products or services of Licensee, or any third party.
8. By copying, installing or otherwise using Python, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.
BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
-------------------------------------------
BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
Individual or Organization ("Licensee") accessing and otherwise using
this software in source or binary form and its associated
documentation ("the Software").
2. Subject to the terms and conditions of this BeOpen Python License
Agreement, BeOpen hereby grants Licensee a non-exclusive,
royalty-free, world-wide license to reproduce, analyze, test, perform
and/or display publicly, prepare derivative works, distribute, and
otherwise use the Software alone or in any derivative version,
provided, however, that the BeOpen Python License is retained in the
Software, alone or in any derivative version prepared by Licensee.
3. BeOpen is making the Software available to Licensee on an "AS IS"
basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
5. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
6. This License Agreement shall be governed by and interpreted in all
respects by the law of the State of California, excluding conflict of
law provisions. Nothing in this License Agreement shall be deemed to
create any relationship of agency, partnership, or joint venture
between BeOpen and Licensee. This License Agreement does not grant
permission to use BeOpen trademarks or trade names in a trademark
sense to endorse or promote products or services of Licensee, or any
third party. As an exception, the "BeOpen Python" logos available at
http://www.pythonlabs.com/logos.html may be used according to the
permissions granted on that web page.
7. By copying, installing or otherwise using the software, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.
CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
---------------------------------------
1. This LICENSE AGREEMENT is between the Corporation for National
Research Initiatives, having an office at 1895 Preston White Drive,
Reston, VA 20191 ("CNRI"), and the Individual or Organization
("Licensee") accessing and otherwise using Python 1.6.1 software in
source or binary form and its associated documentation.
2. Subject to the terms and conditions of this License Agreement, CNRI
hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display publicly,
prepare derivative works, distribute, and otherwise use Python 1.6.1
alone or in any derivative version, provided, however, that CNRI's
License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
1995-2001 Corporation for National Research Initiatives; All Rights
Reserved" are retained in Python 1.6.1 alone or in any derivative
version prepared by Licensee. Alternately, in lieu of CNRI's License
Agreement, Licensee may substitute the following text (omitting the
quotes): "Python 1.6.1 is made available subject to the terms and
conditions in CNRI's License Agreement. This Agreement together with
Python 1.6.1 may be located on the Internet using the following
unique, persistent identifier (known as a handle): 1895.22/1013. This
Agreement may also be obtained from a proxy server on the Internet
using the following URL: http://hdl.handle.net/1895.22/1013".
3. In the event Licensee prepares a derivative work that is based on
or incorporates Python 1.6.1 or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to Python 1.6.1.
4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. This License Agreement shall be governed by the federal
intellectual property law of the United States, including without
limitation the federal copyright law, and, to the extent such
U.S. federal law does not apply, by the law of the Commonwealth of
Virginia, excluding Virginia's conflict of law provisions.
Notwithstanding the foregoing, with regard to derivative works based
on Python 1.6.1 that incorporate non-separable material that was
previously distributed under the GNU General Public License (GPL), the
law of the Commonwealth of Virginia shall govern this License
Agreement only as to issues arising under or with respect to
Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this
License Agreement shall be deemed to create any relationship of
agency, partnership, or joint venture between CNRI and Licensee. This
License Agreement does not grant permission to use CNRI trademarks or
trade name in a trademark sense to endorse or promote products or
services of Licensee, or any third party.
8. By clicking on the "ACCEPT" button where indicated, or by copying,
installing or otherwise using Python 1.6.1, Licensee agrees to be
bound by the terms and conditions of this License Agreement.
ACCEPT
CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
--------------------------------------------------
Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
The Netherlands. All rights reserved.
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the name of Stichting Mathematisch
Centrum or CWI not be used in advertising or publicity pertaining to
distribution of the software without specific, written prior
permission.
STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
================================================
FILE: metaflow/_vendor/v3_6/typing_extensions.py
================================================
import abc
import collections
import collections.abc
import operator
import sys
import types as _types
import typing
# After PEP 560, internal typing API was substantially reworked.
# This is especially important for Protocol class which uses internal APIs
# quite extensively.
PEP_560 = sys.version_info[:3] >= (3, 7, 0)
if PEP_560:
GenericMeta = type
else:
# 3.6
from typing import GenericMeta, _type_vars # noqa
# Please keep __all__ alphabetized within each category.
__all__ = [
# Super-special typing primitives.
'ClassVar',
'Concatenate',
'Final',
'LiteralString',
'ParamSpec',
'Self',
'Type',
'TypeVarTuple',
'Unpack',
# ABCs (from collections.abc).
'Awaitable',
'AsyncIterator',
'AsyncIterable',
'Coroutine',
'AsyncGenerator',
'AsyncContextManager',
'ChainMap',
# Concrete collection types.
'ContextManager',
'Counter',
'Deque',
'DefaultDict',
'OrderedDict',
'TypedDict',
# Structural checks, a.k.a. protocols.
'SupportsIndex',
# One-off things.
'Annotated',
'assert_never',
'dataclass_transform',
'final',
'IntVar',
'is_typeddict',
'Literal',
'NewType',
'overload',
'Protocol',
'reveal_type',
'runtime',
'runtime_checkable',
'Text',
'TypeAlias',
'TypeGuard',
'TYPE_CHECKING',
'Never',
'NoReturn',
'Required',
'NotRequired',
]
if PEP_560:
__all__.extend(["get_args", "get_origin", "get_type_hints"])
# The functions below are modified copies of typing internal helpers.
# They are needed by _ProtocolMeta and they provide support for PEP 646.
def _no_slots_copy(dct):
dict_copy = dict(dct)
if '__slots__' in dict_copy:
for slot in dict_copy['__slots__']:
dict_copy.pop(slot, None)
return dict_copy
_marker = object()
def _check_generic(cls, parameters, elen=_marker):
"""Check correct count for parameters of a generic cls (internal helper).
This gives a nice error message in case of count mismatch.
"""
if not elen:
raise TypeError(f"{cls} is not a generic class")
if elen is _marker:
if not hasattr(cls, "__parameters__") or not cls.__parameters__:
raise TypeError(f"{cls} is not a generic class")
elen = len(cls.__parameters__)
alen = len(parameters)
if alen != elen:
if hasattr(cls, "__parameters__"):
parameters = [p for p in cls.__parameters__ if not _is_unpack(p)]
num_tv_tuples = sum(isinstance(p, TypeVarTuple) for p in parameters)
if (num_tv_tuples > 0) and (alen >= elen - num_tv_tuples):
return
raise TypeError(f"Too {'many' if alen > elen else 'few'} parameters for {cls};"
f" actual {alen}, expected {elen}")
if sys.version_info >= (3, 10):
def _should_collect_from_parameters(t):
return isinstance(
t, (typing._GenericAlias, _types.GenericAlias, _types.UnionType)
)
elif sys.version_info >= (3, 9):
def _should_collect_from_parameters(t):
return isinstance(t, (typing._GenericAlias, _types.GenericAlias))
else:
def _should_collect_from_parameters(t):
return isinstance(t, typing._GenericAlias) and not t._special
def _collect_type_vars(types, typevar_types=None):
"""Collect all type variable contained in types in order of
first appearance (lexicographic order). For example::
_collect_type_vars((T, List[S, T])) == (T, S)
"""
if typevar_types is None:
typevar_types = typing.TypeVar
tvars = []
for t in types:
if (
isinstance(t, typevar_types) and
t not in tvars and
not _is_unpack(t)
):
tvars.append(t)
if _should_collect_from_parameters(t):
tvars.extend([t for t in t.__parameters__ if t not in tvars])
return tuple(tvars)
# 3.6.2+
if hasattr(typing, 'NoReturn'):
NoReturn = typing.NoReturn
# 3.6.0-3.6.1
else:
class _NoReturn(typing._FinalTypingBase, _root=True):
"""Special type indicating functions that never return.
Example::
from typing import NoReturn
def stop() -> NoReturn:
raise Exception('no way')
This type is invalid in other positions, e.g., ``List[NoReturn]``
will fail in static type checkers.
"""
__slots__ = ()
def __instancecheck__(self, obj):
raise TypeError("NoReturn cannot be used with isinstance().")
def __subclasscheck__(self, cls):
raise TypeError("NoReturn cannot be used with issubclass().")
NoReturn = _NoReturn(_root=True)
# Some unconstrained type variables. These are used by the container types.
# (These are not for export.)
T = typing.TypeVar('T') # Any type.
KT = typing.TypeVar('KT') # Key type.
VT = typing.TypeVar('VT') # Value type.
T_co = typing.TypeVar('T_co', covariant=True) # Any type covariant containers.
T_contra = typing.TypeVar('T_contra', contravariant=True) # Ditto contravariant.
ClassVar = typing.ClassVar
# On older versions of typing there is an internal class named "Final".
# 3.8+
if hasattr(typing, 'Final') and sys.version_info[:2] >= (3, 7):
Final = typing.Final
# 3.7
elif sys.version_info[:2] >= (3, 7):
class _FinalForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
def __getitem__(self, parameters):
item = typing._type_check(parameters,
f'{self._name} accepts only single type')
return typing._GenericAlias(self, (item,))
Final = _FinalForm('Final',
doc="""A special typing construct to indicate that a name
cannot be re-assigned or overridden in a subclass.
For example:
MAX_SIZE: Final = 9000
MAX_SIZE += 1 # Error reported by type checker
class Connection:
TIMEOUT: Final[int] = 10
class FastConnector(Connection):
TIMEOUT = 1 # Error reported by type checker
There is no runtime checking of these properties.""")
# 3.6
else:
class _Final(typing._FinalTypingBase, _root=True):
"""A special typing construct to indicate that a name
cannot be re-assigned or overridden in a subclass.
For example:
MAX_SIZE: Final = 9000
MAX_SIZE += 1 # Error reported by type checker
class Connection:
TIMEOUT: Final[int] = 10
class FastConnector(Connection):
TIMEOUT = 1 # Error reported by type checker
There is no runtime checking of these properties.
"""
__slots__ = ('__type__',)
def __init__(self, tp=None, **kwds):
self.__type__ = tp
def __getitem__(self, item):
cls = type(self)
if self.__type__ is None:
return cls(typing._type_check(item,
f'{cls.__name__[1:]} accepts only single type.'),
_root=True)
raise TypeError(f'{cls.__name__[1:]} cannot be further subscripted')
def _eval_type(self, globalns, localns):
new_tp = typing._eval_type(self.__type__, globalns, localns)
if new_tp == self.__type__:
return self
return type(self)(new_tp, _root=True)
def __repr__(self):
r = super().__repr__()
if self.__type__ is not None:
r += f'[{typing._type_repr(self.__type__)}]'
return r
def __hash__(self):
return hash((type(self).__name__, self.__type__))
def __eq__(self, other):
if not isinstance(other, _Final):
return NotImplemented
if self.__type__ is not None:
return self.__type__ == other.__type__
return self is other
Final = _Final(_root=True)
if sys.version_info >= (3, 11):
final = typing.final
else:
# @final exists in 3.8+, but we backport it for all versions
# before 3.11 to keep support for the __final__ attribute.
# See https://bugs.python.org/issue46342
def final(f):
"""This decorator can be used to indicate to type checkers that
the decorated method cannot be overridden, and decorated class
cannot be subclassed. For example:
class Base:
@final
def done(self) -> None:
...
class Sub(Base):
def done(self) -> None: # Error reported by type checker
...
@final
class Leaf:
...
class Other(Leaf): # Error reported by type checker
...
There is no runtime checking of these properties. The decorator
sets the ``__final__`` attribute to ``True`` on the decorated object
to allow runtime introspection.
"""
try:
f.__final__ = True
except (AttributeError, TypeError):
# Skip the attribute silently if it is not writable.
# AttributeError happens if the object has __slots__ or a
# read-only property, TypeError if it's a builtin class.
pass
return f
def IntVar(name):
return typing.TypeVar(name)
# 3.8+:
if hasattr(typing, 'Literal'):
Literal = typing.Literal
# 3.7:
elif sys.version_info[:2] >= (3, 7):
class _LiteralForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
def __getitem__(self, parameters):
return typing._GenericAlias(self, parameters)
Literal = _LiteralForm('Literal',
doc="""A type that can be used to indicate to type checkers
that the corresponding value has a value literally equivalent
to the provided parameter. For example:
var: Literal[4] = 4
The type checker understands that 'var' is literally equal to
the value 4 and no other value.
Literal[...] cannot be subclassed. There is no runtime
checking verifying that the parameter is actually a value
instead of a type.""")
# 3.6:
else:
class _Literal(typing._FinalTypingBase, _root=True):
"""A type that can be used to indicate to type checkers that the
corresponding value has a value literally equivalent to the
provided parameter. For example:
var: Literal[4] = 4
The type checker understands that 'var' is literally equal to the
value 4 and no other value.
Literal[...] cannot be subclassed. There is no runtime checking
verifying that the parameter is actually a value instead of a type.
"""
__slots__ = ('__values__',)
def __init__(self, values=None, **kwds):
self.__values__ = values
def __getitem__(self, values):
cls = type(self)
if self.__values__ is None:
if not isinstance(values, tuple):
values = (values,)
return cls(values, _root=True)
raise TypeError(f'{cls.__name__[1:]} cannot be further subscripted')
def _eval_type(self, globalns, localns):
return self
def __repr__(self):
r = super().__repr__()
if self.__values__ is not None:
r += f'[{", ".join(map(typing._type_repr, self.__values__))}]'
return r
def __hash__(self):
return hash((type(self).__name__, self.__values__))
def __eq__(self, other):
if not isinstance(other, _Literal):
return NotImplemented
if self.__values__ is not None:
return self.__values__ == other.__values__
return self is other
Literal = _Literal(_root=True)
_overload_dummy = typing._overload_dummy # noqa
overload = typing.overload
# This is not a real generic class. Don't use outside annotations.
Type = typing.Type
# Various ABCs mimicking those in collections.abc.
# A few are simply re-exported for completeness.
class _ExtensionsGenericMeta(GenericMeta):
def __subclasscheck__(self, subclass):
"""This mimics a more modern GenericMeta.__subclasscheck__() logic
(that does not have problems with recursion) to work around interactions
between collections, typing, and typing_extensions on older
versions of Python, see https://github.com/python/typing/issues/501.
"""
if self.__origin__ is not None:
if sys._getframe(1).f_globals['__name__'] not in ['abc', 'functools']:
raise TypeError("Parameterized generics cannot be used with class "
"or instance checks")
return False
if not self.__extra__:
return super().__subclasscheck__(subclass)
res = self.__extra__.__subclasshook__(subclass)
if res is not NotImplemented:
return res
if self.__extra__ in subclass.__mro__:
return True
for scls in self.__extra__.__subclasses__():
if isinstance(scls, GenericMeta):
continue
if issubclass(subclass, scls):
return True
return False
Awaitable = typing.Awaitable
Coroutine = typing.Coroutine
AsyncIterable = typing.AsyncIterable
AsyncIterator = typing.AsyncIterator
# 3.6.1+
if hasattr(typing, 'Deque'):
Deque = typing.Deque
# 3.6.0
else:
class Deque(collections.deque, typing.MutableSequence[T],
metaclass=_ExtensionsGenericMeta,
extra=collections.deque):
__slots__ = ()
def __new__(cls, *args, **kwds):
if cls._gorg is Deque:
return collections.deque(*args, **kwds)
return typing._generic_new(collections.deque, cls, *args, **kwds)
ContextManager = typing.ContextManager
# 3.6.2+
if hasattr(typing, 'AsyncContextManager'):
AsyncContextManager = typing.AsyncContextManager
# 3.6.0-3.6.1
else:
from _collections_abc import _check_methods as _check_methods_in_mro # noqa
class AsyncContextManager(typing.Generic[T_co]):
__slots__ = ()
async def __aenter__(self):
return self
@abc.abstractmethod
async def __aexit__(self, exc_type, exc_value, traceback):
return None
@classmethod
def __subclasshook__(cls, C):
if cls is AsyncContextManager:
return _check_methods_in_mro(C, "__aenter__", "__aexit__")
return NotImplemented
DefaultDict = typing.DefaultDict
# 3.7.2+
if hasattr(typing, 'OrderedDict'):
OrderedDict = typing.OrderedDict
# 3.7.0-3.7.2
elif (3, 7, 0) <= sys.version_info[:3] < (3, 7, 2):
OrderedDict = typing._alias(collections.OrderedDict, (KT, VT))
# 3.6
else:
class OrderedDict(collections.OrderedDict, typing.MutableMapping[KT, VT],
metaclass=_ExtensionsGenericMeta,
extra=collections.OrderedDict):
__slots__ = ()
def __new__(cls, *args, **kwds):
if cls._gorg is OrderedDict:
return collections.OrderedDict(*args, **kwds)
return typing._generic_new(collections.OrderedDict, cls, *args, **kwds)
# 3.6.2+
if hasattr(typing, 'Counter'):
Counter = typing.Counter
# 3.6.0-3.6.1
else:
class Counter(collections.Counter,
typing.Dict[T, int],
metaclass=_ExtensionsGenericMeta, extra=collections.Counter):
__slots__ = ()
def __new__(cls, *args, **kwds):
if cls._gorg is Counter:
return collections.Counter(*args, **kwds)
return typing._generic_new(collections.Counter, cls, *args, **kwds)
# 3.6.1+
if hasattr(typing, 'ChainMap'):
ChainMap = typing.ChainMap
elif hasattr(collections, 'ChainMap'):
class ChainMap(collections.ChainMap, typing.MutableMapping[KT, VT],
metaclass=_ExtensionsGenericMeta,
extra=collections.ChainMap):
__slots__ = ()
def __new__(cls, *args, **kwds):
if cls._gorg is ChainMap:
return collections.ChainMap(*args, **kwds)
return typing._generic_new(collections.ChainMap, cls, *args, **kwds)
# 3.6.1+
if hasattr(typing, 'AsyncGenerator'):
AsyncGenerator = typing.AsyncGenerator
# 3.6.0
else:
class AsyncGenerator(AsyncIterator[T_co], typing.Generic[T_co, T_contra],
metaclass=_ExtensionsGenericMeta,
extra=collections.abc.AsyncGenerator):
__slots__ = ()
NewType = typing.NewType
Text = typing.Text
TYPE_CHECKING = typing.TYPE_CHECKING
def _gorg(cls):
"""This function exists for compatibility with old typing versions."""
assert isinstance(cls, GenericMeta)
if hasattr(cls, '_gorg'):
return cls._gorg
while cls.__origin__ is not None:
cls = cls.__origin__
return cls
_PROTO_WHITELIST = ['Callable', 'Awaitable',
'Iterable', 'Iterator', 'AsyncIterable', 'AsyncIterator',
'Hashable', 'Sized', 'Container', 'Collection', 'Reversible',
'ContextManager', 'AsyncContextManager']
def _get_protocol_attrs(cls):
attrs = set()
for base in cls.__mro__[:-1]: # without object
if base.__name__ in ('Protocol', 'Generic'):
continue
annotations = getattr(base, '__annotations__', {})
for attr in list(base.__dict__.keys()) + list(annotations.keys()):
if (not attr.startswith('_abc_') and attr not in (
'__abstractmethods__', '__annotations__', '__weakref__',
'_is_protocol', '_is_runtime_protocol', '__dict__',
'__args__', '__slots__',
'__next_in_mro__', '__parameters__', '__origin__',
'__orig_bases__', '__extra__', '__tree_hash__',
'__doc__', '__subclasshook__', '__init__', '__new__',
'__module__', '_MutableMapping__marker', '_gorg')):
attrs.add(attr)
return attrs
def _is_callable_members_only(cls):
return all(callable(getattr(cls, attr, None)) for attr in _get_protocol_attrs(cls))
# 3.8+
if hasattr(typing, 'Protocol'):
Protocol = typing.Protocol
# 3.7
elif PEP_560:
def _no_init(self, *args, **kwargs):
if type(self)._is_protocol:
raise TypeError('Protocols cannot be instantiated')
class _ProtocolMeta(abc.ABCMeta):
# This metaclass is a bit unfortunate and exists only because of the lack
# of __instancehook__.
def __instancecheck__(cls, instance):
# We need this method for situations where attributes are
# assigned in __init__.
if ((not getattr(cls, '_is_protocol', False) or
_is_callable_members_only(cls)) and
issubclass(instance.__class__, cls)):
return True
if cls._is_protocol:
if all(hasattr(instance, attr) and
(not callable(getattr(cls, attr, None)) or
getattr(instance, attr) is not None)
for attr in _get_protocol_attrs(cls)):
return True
return super().__instancecheck__(instance)
class Protocol(metaclass=_ProtocolMeta):
# There is quite a lot of overlapping code with typing.Generic.
# Unfortunately it is hard to avoid this while these live in two different
# modules. The duplicated code will be removed when Protocol is moved to typing.
"""Base class for protocol classes. Protocol classes are defined as::
class Proto(Protocol):
def meth(self) -> int:
...
Such classes are primarily used with static type checkers that recognize
structural subtyping (static duck-typing), for example::
class C:
def meth(self) -> int:
return 0
def func(x: Proto) -> int:
return x.meth()
func(C()) # Passes static type check
See PEP 544 for details. Protocol classes decorated with
@typing_extensions.runtime act as simple-minded runtime protocol that checks
only the presence of given attributes, ignoring their type signatures.
Protocol classes can be generic, they are defined as::
class GenProto(Protocol[T]):
def meth(self) -> T:
...
"""
__slots__ = ()
_is_protocol = True
def __new__(cls, *args, **kwds):
if cls is Protocol:
raise TypeError("Type Protocol cannot be instantiated; "
"it can only be used as a base class")
return super().__new__(cls)
@typing._tp_cache
def __class_getitem__(cls, params):
if not isinstance(params, tuple):
params = (params,)
if not params and cls is not typing.Tuple:
raise TypeError(
f"Parameter list to {cls.__qualname__}[...] cannot be empty")
msg = "Parameters to generic types must be types."
params = tuple(typing._type_check(p, msg) for p in params) # noqa
if cls is Protocol:
# Generic can only be subscripted with unique type variables.
if not all(isinstance(p, typing.TypeVar) for p in params):
i = 0
while isinstance(params[i], typing.TypeVar):
i += 1
raise TypeError(
"Parameters to Protocol[...] must all be type variables."
f" Parameter {i + 1} is {params[i]}")
if len(set(params)) != len(params):
raise TypeError(
"Parameters to Protocol[...] must all be unique")
else:
# Subscripting a regular Generic subclass.
_check_generic(cls, params, len(cls.__parameters__))
return typing._GenericAlias(cls, params)
def __init_subclass__(cls, *args, **kwargs):
tvars = []
if '__orig_bases__' in cls.__dict__:
error = typing.Generic in cls.__orig_bases__
else:
error = typing.Generic in cls.__bases__
if error:
raise TypeError("Cannot inherit from plain Generic")
if '__orig_bases__' in cls.__dict__:
tvars = typing._collect_type_vars(cls.__orig_bases__)
# Look for Generic[T1, ..., Tn] or Protocol[T1, ..., Tn].
# If found, tvars must be a subset of it.
# If not found, tvars is it.
# Also check for and reject plain Generic,
# and reject multiple Generic[...] and/or Protocol[...].
gvars = None
for base in cls.__orig_bases__:
if (isinstance(base, typing._GenericAlias) and
base.__origin__ in (typing.Generic, Protocol)):
# for error messages
the_base = base.__origin__.__name__
if gvars is not None:
raise TypeError(
"Cannot inherit from Generic[...]"
" and/or Protocol[...] multiple types.")
gvars = base.__parameters__
if gvars is None:
gvars = tvars
else:
tvarset = set(tvars)
gvarset = set(gvars)
if not tvarset <= gvarset:
s_vars = ', '.join(str(t) for t in tvars if t not in gvarset)
s_args = ', '.join(str(g) for g in gvars)
raise TypeError(f"Some type variables ({s_vars}) are"
f" not listed in {the_base}[{s_args}]")
tvars = gvars
cls.__parameters__ = tuple(tvars)
# Determine if this is a protocol or a concrete subclass.
if not cls.__dict__.get('_is_protocol', None):
cls._is_protocol = any(b is Protocol for b in cls.__bases__)
# Set (or override) the protocol subclass hook.
def _proto_hook(other):
if not cls.__dict__.get('_is_protocol', None):
return NotImplemented
if not getattr(cls, '_is_runtime_protocol', False):
if sys._getframe(2).f_globals['__name__'] in ['abc', 'functools']:
return NotImplemented
raise TypeError("Instance and class checks can only be used with"
" @runtime protocols")
if not _is_callable_members_only(cls):
if sys._getframe(2).f_globals['__name__'] in ['abc', 'functools']:
return NotImplemented
raise TypeError("Protocols with non-method members"
" don't support issubclass()")
if not isinstance(other, type):
# Same error as for issubclass(1, int)
raise TypeError('issubclass() arg 1 must be a class')
for attr in _get_protocol_attrs(cls):
for base in other.__mro__:
if attr in base.__dict__:
if base.__dict__[attr] is None:
return NotImplemented
break
annotations = getattr(base, '__annotations__', {})
if (isinstance(annotations, typing.Mapping) and
attr in annotations and
isinstance(other, _ProtocolMeta) and
other._is_protocol):
break
else:
return NotImplemented
return True
if '__subclasshook__' not in cls.__dict__:
cls.__subclasshook__ = _proto_hook
# We have nothing more to do for non-protocols.
if not cls._is_protocol:
return
# Check consistency of bases.
for base in cls.__bases__:
if not (base in (object, typing.Generic) or
base.__module__ == 'collections.abc' and
base.__name__ in _PROTO_WHITELIST or
isinstance(base, _ProtocolMeta) and base._is_protocol):
raise TypeError('Protocols can only inherit from other'
f' protocols, got {repr(base)}')
cls.__init__ = _no_init
# 3.6
else:
from typing import _next_in_mro, _type_check # noqa
def _no_init(self, *args, **kwargs):
if type(self)._is_protocol:
raise TypeError('Protocols cannot be instantiated')
class _ProtocolMeta(GenericMeta):
"""Internal metaclass for Protocol.
This exists so Protocol classes can be generic without deriving
from Generic.
"""
def __new__(cls, name, bases, namespace,
tvars=None, args=None, origin=None, extra=None, orig_bases=None):
# This is just a version copied from GenericMeta.__new__ that
# includes "Protocol" special treatment. (Comments removed for brevity.)
assert extra is None # Protocols should not have extra
if tvars is not None:
assert origin is not None
assert all(isinstance(t, typing.TypeVar) for t in tvars), tvars
else:
tvars = _type_vars(bases)
gvars = None
for base in bases:
if base is typing.Generic:
raise TypeError("Cannot inherit from plain Generic")
if (isinstance(base, GenericMeta) and
base.__origin__ in (typing.Generic, Protocol)):
if gvars is not None:
raise TypeError(
"Cannot inherit from Generic[...] or"
" Protocol[...] multiple times.")
gvars = base.__parameters__
if gvars is None:
gvars = tvars
else:
tvarset = set(tvars)
gvarset = set(gvars)
if not tvarset <= gvarset:
s_vars = ", ".join(str(t) for t in tvars if t not in gvarset)
s_args = ", ".join(str(g) for g in gvars)
cls_name = "Generic" if any(b.__origin__ is typing.Generic
for b in bases) else "Protocol"
raise TypeError(f"Some type variables ({s_vars}) are"
f" not listed in {cls_name}[{s_args}]")
tvars = gvars
initial_bases = bases
if (extra is not None and type(extra) is abc.ABCMeta and
extra not in bases):
bases = (extra,) + bases
bases = tuple(_gorg(b) if isinstance(b, GenericMeta) else b
for b in bases)
if any(isinstance(b, GenericMeta) and b is not typing.Generic for b in bases):
bases = tuple(b for b in bases if b is not typing.Generic)
namespace.update({'__origin__': origin, '__extra__': extra})
self = super(GenericMeta, cls).__new__(cls, name, bases, namespace,
_root=True)
super(GenericMeta, self).__setattr__('_gorg',
self if not origin else
_gorg(origin))
self.__parameters__ = tvars
self.__args__ = tuple(... if a is typing._TypingEllipsis else
() if a is typing._TypingEmpty else
a for a in args) if args else None
self.__next_in_mro__ = _next_in_mro(self)
if orig_bases is None:
self.__orig_bases__ = initial_bases
elif origin is not None:
self._abc_registry = origin._abc_registry
self._abc_cache = origin._abc_cache
if hasattr(self, '_subs_tree'):
self.__tree_hash__ = (hash(self._subs_tree()) if origin else
super(GenericMeta, self).__hash__())
return self
def __init__(cls, *args, **kwargs):
super().__init__(*args, **kwargs)
if not cls.__dict__.get('_is_protocol', None):
cls._is_protocol = any(b is Protocol or
isinstance(b, _ProtocolMeta) and
b.__origin__ is Protocol
for b in cls.__bases__)
if cls._is_protocol:
for base in cls.__mro__[1:]:
if not (base in (object, typing.Generic) or
base.__module__ == 'collections.abc' and
base.__name__ in _PROTO_WHITELIST or
isinstance(base, typing.TypingMeta) and base._is_protocol or
isinstance(base, GenericMeta) and
base.__origin__ is typing.Generic):
raise TypeError(f'Protocols can only inherit from other'
f' protocols, got {repr(base)}')
cls.__init__ = _no_init
def _proto_hook(other):
if not cls.__dict__.get('_is_protocol', None):
return NotImplemented
if not isinstance(other, type):
# Same error as for issubclass(1, int)
raise TypeError('issubclass() arg 1 must be a class')
for attr in _get_protocol_attrs(cls):
for base in other.__mro__:
if attr in base.__dict__:
if base.__dict__[attr] is None:
return NotImplemented
break
annotations = getattr(base, '__annotations__', {})
if (isinstance(annotations, typing.Mapping) and
attr in annotations and
isinstance(other, _ProtocolMeta) and
other._is_protocol):
break
else:
return NotImplemented
return True
if '__subclasshook__' not in cls.__dict__:
cls.__subclasshook__ = _proto_hook
def __instancecheck__(self, instance):
# We need this method for situations where attributes are
# assigned in __init__.
if ((not getattr(self, '_is_protocol', False) or
_is_callable_members_only(self)) and
issubclass(instance.__class__, self)):
return True
if self._is_protocol:
if all(hasattr(instance, attr) and
(not callable(getattr(self, attr, None)) or
getattr(instance, attr) is not None)
for attr in _get_protocol_attrs(self)):
return True
return super(GenericMeta, self).__instancecheck__(instance)
def __subclasscheck__(self, cls):
if self.__origin__ is not None:
if sys._getframe(1).f_globals['__name__'] not in ['abc', 'functools']:
raise TypeError("Parameterized generics cannot be used with class "
"or instance checks")
return False
if (self.__dict__.get('_is_protocol', None) and
not self.__dict__.get('_is_runtime_protocol', None)):
if sys._getframe(1).f_globals['__name__'] in ['abc',
'functools',
'typing']:
return False
raise TypeError("Instance and class checks can only be used with"
" @runtime protocols")
if (self.__dict__.get('_is_runtime_protocol', None) and
not _is_callable_members_only(self)):
if sys._getframe(1).f_globals['__name__'] in ['abc',
'functools',
'typing']:
return super(GenericMeta, self).__subclasscheck__(cls)
raise TypeError("Protocols with non-method members"
" don't support issubclass()")
return super(GenericMeta, self).__subclasscheck__(cls)
@typing._tp_cache
def __getitem__(self, params):
# We also need to copy this from GenericMeta.__getitem__ to get
# special treatment of "Protocol". (Comments removed for brevity.)
if not isinstance(params, tuple):
params = (params,)
if not params and _gorg(self) is not typing.Tuple:
raise TypeError(
f"Parameter list to {self.__qualname__}[...] cannot be empty")
msg = "Parameters to generic types must be types."
params = tuple(_type_check(p, msg) for p in params)
if self in (typing.Generic, Protocol):
if not all(isinstance(p, typing.TypeVar) for p in params):
raise TypeError(
f"Parameters to {repr(self)}[...] must all be type variables")
if len(set(params)) != len(params):
raise TypeError(
f"Parameters to {repr(self)}[...] must all be unique")
tvars = params
args = params
elif self in (typing.Tuple, typing.Callable):
tvars = _type_vars(params)
args = params
elif self.__origin__ in (typing.Generic, Protocol):
raise TypeError(f"Cannot subscript already-subscripted {repr(self)}")
else:
_check_generic(self, params, len(self.__parameters__))
tvars = _type_vars(params)
args = params
prepend = (self,) if self.__origin__ is None else ()
return self.__class__(self.__name__,
prepend + self.__bases__,
_no_slots_copy(self.__dict__),
tvars=tvars,
args=args,
origin=self,
extra=self.__extra__,
orig_bases=self.__orig_bases__)
class Protocol(metaclass=_ProtocolMeta):
"""Base class for protocol classes. Protocol classes are defined as::
class Proto(Protocol):
def meth(self) -> int:
...
Such classes are primarily used with static type checkers that recognize
structural subtyping (static duck-typing), for example::
class C:
def meth(self) -> int:
return 0
def func(x: Proto) -> int:
return x.meth()
func(C()) # Passes static type check
See PEP 544 for details. Protocol classes decorated with
@typing_extensions.runtime act as simple-minded runtime protocol that checks
only the presence of given attributes, ignoring their type signatures.
Protocol classes can be generic, they are defined as::
class GenProto(Protocol[T]):
def meth(self) -> T:
...
"""
__slots__ = ()
_is_protocol = True
def __new__(cls, *args, **kwds):
if _gorg(cls) is Protocol:
raise TypeError("Type Protocol cannot be instantiated; "
"it can be used only as a base class")
return typing._generic_new(cls.__next_in_mro__, cls, *args, **kwds)
# 3.8+
if hasattr(typing, 'runtime_checkable'):
runtime_checkable = typing.runtime_checkable
# 3.6-3.7
else:
def runtime_checkable(cls):
"""Mark a protocol class as a runtime protocol, so that it
can be used with isinstance() and issubclass(). Raise TypeError
if applied to a non-protocol class.
This allows a simple-minded structural check very similar to the
one-offs in collections.abc such as Hashable.
"""
if not isinstance(cls, _ProtocolMeta) or not cls._is_protocol:
raise TypeError('@runtime_checkable can be only applied to protocol classes,'
f' got {cls!r}')
cls._is_runtime_protocol = True
return cls
# Exists for backwards compatibility.
runtime = runtime_checkable
# 3.8+
if hasattr(typing, 'SupportsIndex'):
SupportsIndex = typing.SupportsIndex
# 3.6-3.7
else:
@runtime_checkable
class SupportsIndex(Protocol):
__slots__ = ()
@abc.abstractmethod
def __index__(self) -> int:
pass
if hasattr(typing, "Required"):
# The standard library TypedDict in Python 3.8 does not store runtime information
# about which (if any) keys are optional. See https://bugs.python.org/issue38834
# The standard library TypedDict in Python 3.9.0/1 does not honour the "total"
# keyword with old-style TypedDict(). See https://bugs.python.org/issue42059
# The standard library TypedDict below Python 3.11 does not store runtime
# information about optional and required keys when using Required or NotRequired.
TypedDict = typing.TypedDict
_TypedDictMeta = typing._TypedDictMeta
is_typeddict = typing.is_typeddict
else:
def _check_fails(cls, other):
try:
if sys._getframe(1).f_globals['__name__'] not in ['abc',
'functools',
'typing']:
# Typed dicts are only for static structural subtyping.
raise TypeError('TypedDict does not support instance and class checks')
except (AttributeError, ValueError):
pass
return False
def _dict_new(*args, **kwargs):
if not args:
raise TypeError('TypedDict.__new__(): not enough arguments')
_, args = args[0], args[1:] # allow the "cls" keyword be passed
return dict(*args, **kwargs)
_dict_new.__text_signature__ = '($cls, _typename, _fields=None, /, **kwargs)'
def _typeddict_new(*args, total=True, **kwargs):
if not args:
raise TypeError('TypedDict.__new__(): not enough arguments')
_, args = args[0], args[1:] # allow the "cls" keyword be passed
if args:
typename, args = args[0], args[1:] # allow the "_typename" keyword be passed
elif '_typename' in kwargs:
typename = kwargs.pop('_typename')
import warnings
warnings.warn("Passing '_typename' as keyword argument is deprecated",
DeprecationWarning, stacklevel=2)
else:
raise TypeError("TypedDict.__new__() missing 1 required positional "
"argument: '_typename'")
if args:
try:
fields, = args # allow the "_fields" keyword be passed
except ValueError:
raise TypeError('TypedDict.__new__() takes from 2 to 3 '
f'positional arguments but {len(args) + 2} '
'were given')
elif '_fields' in kwargs and len(kwargs) == 1:
fields = kwargs.pop('_fields')
import warnings
warnings.warn("Passing '_fields' as keyword argument is deprecated",
DeprecationWarning, stacklevel=2)
else:
fields = None
if fields is None:
fields = kwargs
elif kwargs:
raise TypeError("TypedDict takes either a dict or keyword arguments,"
" but not both")
ns = {'__annotations__': dict(fields)}
try:
# Setting correct module is necessary to make typed dict classes pickleable.
ns['__module__'] = sys._getframe(1).f_globals.get('__name__', '__main__')
except (AttributeError, ValueError):
pass
return _TypedDictMeta(typename, (), ns, total=total)
_typeddict_new.__text_signature__ = ('($cls, _typename, _fields=None,'
' /, *, total=True, **kwargs)')
class _TypedDictMeta(type):
def __init__(cls, name, bases, ns, total=True):
super().__init__(name, bases, ns)
def __new__(cls, name, bases, ns, total=True):
# Create new typed dict class object.
# This method is called directly when TypedDict is subclassed,
# or via _typeddict_new when TypedDict is instantiated. This way
# TypedDict supports all three syntaxes described in its docstring.
# Subclasses and instances of TypedDict return actual dictionaries
# via _dict_new.
ns['__new__'] = _typeddict_new if name == 'TypedDict' else _dict_new
tp_dict = super().__new__(cls, name, (dict,), ns)
annotations = {}
own_annotations = ns.get('__annotations__', {})
msg = "TypedDict('Name', {f0: t0, f1: t1, ...}); each t must be a type"
own_annotations = {
n: typing._type_check(tp, msg) for n, tp in own_annotations.items()
}
required_keys = set()
optional_keys = set()
for base in bases:
annotations.update(base.__dict__.get('__annotations__', {}))
required_keys.update(base.__dict__.get('__required_keys__', ()))
optional_keys.update(base.__dict__.get('__optional_keys__', ()))
annotations.update(own_annotations)
if PEP_560:
for annotation_key, annotation_type in own_annotations.items():
annotation_origin = get_origin(annotation_type)
if annotation_origin is Annotated:
annotation_args = get_args(annotation_type)
if annotation_args:
annotation_type = annotation_args[0]
annotation_origin = get_origin(annotation_type)
if annotation_origin is Required:
required_keys.add(annotation_key)
elif annotation_origin is NotRequired:
optional_keys.add(annotation_key)
elif total:
required_keys.add(annotation_key)
else:
optional_keys.add(annotation_key)
else:
own_annotation_keys = set(own_annotations.keys())
if total:
required_keys.update(own_annotation_keys)
else:
optional_keys.update(own_annotation_keys)
tp_dict.__annotations__ = annotations
tp_dict.__required_keys__ = frozenset(required_keys)
tp_dict.__optional_keys__ = frozenset(optional_keys)
if not hasattr(tp_dict, '__total__'):
tp_dict.__total__ = total
return tp_dict
__instancecheck__ = __subclasscheck__ = _check_fails
TypedDict = _TypedDictMeta('TypedDict', (dict,), {})
TypedDict.__module__ = __name__
TypedDict.__doc__ = \
"""A simple typed name space. At runtime it is equivalent to a plain dict.
TypedDict creates a dictionary type that expects all of its
instances to have a certain set of keys, with each key
associated with a value of a consistent type. This expectation
is not checked at runtime but is only enforced by type checkers.
Usage::
class Point2D(TypedDict):
x: int
y: int
label: str
a: Point2D = {'x': 1, 'y': 2, 'label': 'good'} # OK
b: Point2D = {'z': 3, 'label': 'bad'} # Fails type check
assert Point2D(x=1, y=2, label='first') == dict(x=1, y=2, label='first')
The type info can be accessed via the Point2D.__annotations__ dict, and
the Point2D.__required_keys__ and Point2D.__optional_keys__ frozensets.
TypedDict supports two additional equivalent forms::
Point2D = TypedDict('Point2D', x=int, y=int, label=str)
Point2D = TypedDict('Point2D', {'x': int, 'y': int, 'label': str})
The class syntax is only supported in Python 3.6+, while two other
syntax forms work for Python 2.7 and 3.2+
"""
if hasattr(typing, "_TypedDictMeta"):
_TYPEDDICT_TYPES = (typing._TypedDictMeta, _TypedDictMeta)
else:
_TYPEDDICT_TYPES = (_TypedDictMeta,)
def is_typeddict(tp):
"""Check if an annotation is a TypedDict class
For example::
class Film(TypedDict):
title: str
year: int
is_typeddict(Film) # => True
is_typeddict(Union[list, str]) # => False
"""
return isinstance(tp, tuple(_TYPEDDICT_TYPES))
if hasattr(typing, "Required"):
get_type_hints = typing.get_type_hints
elif PEP_560:
import functools
import types
# replaces _strip_annotations()
def _strip_extras(t):
"""Strips Annotated, Required and NotRequired from a given type."""
if isinstance(t, _AnnotatedAlias):
return _strip_extras(t.__origin__)
if hasattr(t, "__origin__") and t.__origin__ in (Required, NotRequired):
return _strip_extras(t.__args__[0])
if isinstance(t, typing._GenericAlias):
stripped_args = tuple(_strip_extras(a) for a in t.__args__)
if stripped_args == t.__args__:
return t
return t.copy_with(stripped_args)
if hasattr(types, "GenericAlias") and isinstance(t, types.GenericAlias):
stripped_args = tuple(_strip_extras(a) for a in t.__args__)
if stripped_args == t.__args__:
return t
return types.GenericAlias(t.__origin__, stripped_args)
if hasattr(types, "UnionType") and isinstance(t, types.UnionType):
stripped_args = tuple(_strip_extras(a) for a in t.__args__)
if stripped_args == t.__args__:
return t
return functools.reduce(operator.or_, stripped_args)
return t
def get_type_hints(obj, globalns=None, localns=None, include_extras=False):
"""Return type hints for an object.
This is often the same as obj.__annotations__, but it handles
forward references encoded as string literals, adds Optional[t] if a
default value equal to None is set and recursively replaces all
'Annotated[T, ...]', 'Required[T]' or 'NotRequired[T]' with 'T'
(unless 'include_extras=True').
The argument may be a module, class, method, or function. The annotations
are returned as a dictionary. For classes, annotations include also
inherited members.
TypeError is raised if the argument is not of a type that can contain
annotations, and an empty dictionary is returned if no annotations are
present.
BEWARE -- the behavior of globalns and localns is counterintuitive
(unless you are familiar with how eval() and exec() work). The
search order is locals first, then globals.
- If no dict arguments are passed, an attempt is made to use the
globals from obj (or the respective module's globals for classes),
and these are also used as the locals. If the object does not appear
to have globals, an empty dictionary is used.
- If one dict argument is passed, it is used for both globals and
locals.
- If two dict arguments are passed, they specify globals and
locals, respectively.
"""
if hasattr(typing, "Annotated"):
hint = typing.get_type_hints(
obj, globalns=globalns, localns=localns, include_extras=True
)
else:
hint = typing.get_type_hints(obj, globalns=globalns, localns=localns)
if include_extras:
return hint
return {k: _strip_extras(t) for k, t in hint.items()}
# Python 3.9+ has PEP 593 (Annotated)
if hasattr(typing, 'Annotated'):
Annotated = typing.Annotated
# Not exported and not a public API, but needed for get_origin() and get_args()
# to work.
_AnnotatedAlias = typing._AnnotatedAlias
# 3.7-3.8
elif PEP_560:
class _AnnotatedAlias(typing._GenericAlias, _root=True):
"""Runtime representation of an annotated type.
At its core 'Annotated[t, dec1, dec2, ...]' is an alias for the type 't'
with extra annotations. The alias behaves like a normal typing alias,
instantiating is the same as instantiating the underlying type, binding
it to types is also the same.
"""
def __init__(self, origin, metadata):
if isinstance(origin, _AnnotatedAlias):
metadata = origin.__metadata__ + metadata
origin = origin.__origin__
super().__init__(origin, origin)
self.__metadata__ = metadata
def copy_with(self, params):
assert len(params) == 1
new_type = params[0]
return _AnnotatedAlias(new_type, self.__metadata__)
def __repr__(self):
return (f"typing_extensions.Annotated[{typing._type_repr(self.__origin__)}, "
f"{', '.join(repr(a) for a in self.__metadata__)}]")
def __reduce__(self):
return operator.getitem, (
Annotated, (self.__origin__,) + self.__metadata__
)
def __eq__(self, other):
if not isinstance(other, _AnnotatedAlias):
return NotImplemented
if self.__origin__ != other.__origin__:
return False
return self.__metadata__ == other.__metadata__
def __hash__(self):
return hash((self.__origin__, self.__metadata__))
class Annotated:
"""Add context specific metadata to a type.
Example: Annotated[int, runtime_check.Unsigned] indicates to the
hypothetical runtime_check module that this type is an unsigned int.
Every other consumer of this type can ignore this metadata and treat
this type as int.
The first argument to Annotated must be a valid type (and will be in
the __origin__ field), the remaining arguments are kept as a tuple in
the __extra__ field.
Details:
- It's an error to call `Annotated` with less than two arguments.
- Nested Annotated are flattened::
Annotated[Annotated[T, Ann1, Ann2], Ann3] == Annotated[T, Ann1, Ann2, Ann3]
- Instantiating an annotated type is equivalent to instantiating the
underlying type::
Annotated[C, Ann1](5) == C(5)
- Annotated can be used as a generic type alias::
Optimized = Annotated[T, runtime.Optimize()]
Optimized[int] == Annotated[int, runtime.Optimize()]
OptimizedList = Annotated[List[T], runtime.Optimize()]
OptimizedList[int] == Annotated[List[int], runtime.Optimize()]
"""
__slots__ = ()
def __new__(cls, *args, **kwargs):
raise TypeError("Type Annotated cannot be instantiated.")
@typing._tp_cache
def __class_getitem__(cls, params):
if not isinstance(params, tuple) or len(params) < 2:
raise TypeError("Annotated[...] should be used "
"with at least two arguments (a type and an "
"annotation).")
allowed_special_forms = (ClassVar, Final)
if get_origin(params[0]) in allowed_special_forms:
origin = params[0]
else:
msg = "Annotated[t, ...]: t must be a type."
origin = typing._type_check(params[0], msg)
metadata = tuple(params[1:])
return _AnnotatedAlias(origin, metadata)
def __init_subclass__(cls, *args, **kwargs):
raise TypeError(
f"Cannot subclass {cls.__module__}.Annotated"
)
# 3.6
else:
def _is_dunder(name):
"""Returns True if name is a __dunder_variable_name__."""
return len(name) > 4 and name.startswith('__') and name.endswith('__')
# Prior to Python 3.7 types did not have `copy_with`. A lot of the equality
# checks, argument expansion etc. are done on the _subs_tre. As a result we
# can't provide a get_type_hints function that strips out annotations.
class AnnotatedMeta(typing.GenericMeta):
"""Metaclass for Annotated"""
def __new__(cls, name, bases, namespace, **kwargs):
if any(b is not object for b in bases):
raise TypeError("Cannot subclass " + str(Annotated))
return super().__new__(cls, name, bases, namespace, **kwargs)
@property
def __metadata__(self):
return self._subs_tree()[2]
def _tree_repr(self, tree):
cls, origin, metadata = tree
if not isinstance(origin, tuple):
tp_repr = typing._type_repr(origin)
else:
tp_repr = origin[0]._tree_repr(origin)
metadata_reprs = ", ".join(repr(arg) for arg in metadata)
return f'{cls}[{tp_repr}, {metadata_reprs}]'
def _subs_tree(self, tvars=None, args=None): # noqa
if self is Annotated:
return Annotated
res = super()._subs_tree(tvars=tvars, args=args)
# Flatten nested Annotated
if isinstance(res[1], tuple) and res[1][0] is Annotated:
sub_tp = res[1][1]
sub_annot = res[1][2]
return (Annotated, sub_tp, sub_annot + res[2])
return res
def _get_cons(self):
"""Return the class used to create instance of this type."""
if self.__origin__ is None:
raise TypeError("Cannot get the underlying type of a "
"non-specialized Annotated type.")
tree = self._subs_tree()
while isinstance(tree, tuple) and tree[0] is Annotated:
tree = tree[1]
if isinstance(tree, tuple):
return tree[0]
else:
return tree
@typing._tp_cache
def __getitem__(self, params):
if not isinstance(params, tuple):
params = (params,)
if self.__origin__ is not None: # specializing an instantiated type
return super().__getitem__(params)
elif not isinstance(params, tuple) or len(params) < 2:
raise TypeError("Annotated[...] should be instantiated "
"with at least two arguments (a type and an "
"annotation).")
else:
if (
isinstance(params[0], typing._TypingBase) and
type(params[0]).__name__ == "_ClassVar"
):
tp = params[0]
else:
msg = "Annotated[t, ...]: t must be a type."
tp = typing._type_check(params[0], msg)
metadata = tuple(params[1:])
return self.__class__(
self.__name__,
self.__bases__,
_no_slots_copy(self.__dict__),
tvars=_type_vars((tp,)),
# Metadata is a tuple so it won't be touched by _replace_args et al.
args=(tp, metadata),
origin=self,
)
def __call__(self, *args, **kwargs):
cons = self._get_cons()
result = cons(*args, **kwargs)
try:
result.__orig_class__ = self
except AttributeError:
pass
return result
def __getattr__(self, attr):
# For simplicity we just don't relay all dunder names
if self.__origin__ is not None and not _is_dunder(attr):
return getattr(self._get_cons(), attr)
raise AttributeError(attr)
def __setattr__(self, attr, value):
if _is_dunder(attr) or attr.startswith('_abc_'):
super().__setattr__(attr, value)
elif self.__origin__ is None:
raise AttributeError(attr)
else:
setattr(self._get_cons(), attr, value)
def __instancecheck__(self, obj):
raise TypeError("Annotated cannot be used with isinstance().")
def __subclasscheck__(self, cls):
raise TypeError("Annotated cannot be used with issubclass().")
class Annotated(metaclass=AnnotatedMeta):
"""Add context specific metadata to a type.
Example: Annotated[int, runtime_check.Unsigned] indicates to the
hypothetical runtime_check module that this type is an unsigned int.
Every other consumer of this type can ignore this metadata and treat
this type as int.
The first argument to Annotated must be a valid type, the remaining
arguments are kept as a tuple in the __metadata__ field.
Details:
- It's an error to call `Annotated` with less than two arguments.
- Nested Annotated are flattened::
Annotated[Annotated[T, Ann1, Ann2], Ann3] == Annotated[T, Ann1, Ann2, Ann3]
- Instantiating an annotated type is equivalent to instantiating the
underlying type::
Annotated[C, Ann1](5) == C(5)
- Annotated can be used as a generic type alias::
Optimized = Annotated[T, runtime.Optimize()]
Optimized[int] == Annotated[int, runtime.Optimize()]
OptimizedList = Annotated[List[T], runtime.Optimize()]
OptimizedList[int] == Annotated[List[int], runtime.Optimize()]
"""
# Python 3.8 has get_origin() and get_args() but those implementations aren't
# Annotated-aware, so we can't use those. Python 3.9's versions don't support
# ParamSpecArgs and ParamSpecKwargs, so only Python 3.10's versions will do.
if sys.version_info[:2] >= (3, 10):
get_origin = typing.get_origin
get_args = typing.get_args
# 3.7-3.9
elif PEP_560:
try:
# 3.9+
from typing import _BaseGenericAlias
except ImportError:
_BaseGenericAlias = typing._GenericAlias
try:
# 3.9+
from typing import GenericAlias
except ImportError:
GenericAlias = typing._GenericAlias
def get_origin(tp):
"""Get the unsubscripted version of a type.
This supports generic types, Callable, Tuple, Union, Literal, Final, ClassVar
and Annotated. Return None for unsupported types. Examples::
get_origin(Literal[42]) is Literal
get_origin(int) is None
get_origin(ClassVar[int]) is ClassVar
get_origin(Generic) is Generic
get_origin(Generic[T]) is Generic
get_origin(Union[T, int]) is Union
get_origin(List[Tuple[T, T]][int]) == list
get_origin(P.args) is P
"""
if isinstance(tp, _AnnotatedAlias):
return Annotated
if isinstance(tp, (typing._GenericAlias, GenericAlias, _BaseGenericAlias,
ParamSpecArgs, ParamSpecKwargs)):
return tp.__origin__
if tp is typing.Generic:
return typing.Generic
return None
def get_args(tp):
"""Get type arguments with all substitutions performed.
For unions, basic simplifications used by Union constructor are performed.
Examples::
get_args(Dict[str, int]) == (str, int)
get_args(int) == ()
get_args(Union[int, Union[T, int], str][int]) == (int, str)
get_args(Union[int, Tuple[T, int]][str]) == (int, Tuple[str, int])
get_args(Callable[[], T][int]) == ([], int)
"""
if isinstance(tp, _AnnotatedAlias):
return (tp.__origin__,) + tp.__metadata__
if isinstance(tp, (typing._GenericAlias, GenericAlias)):
if getattr(tp, "_special", False):
return ()
res = tp.__args__
if get_origin(tp) is collections.abc.Callable and res[0] is not Ellipsis:
res = (list(res[:-1]), res[-1])
return res
return ()
# 3.10+
if hasattr(typing, 'TypeAlias'):
TypeAlias = typing.TypeAlias
# 3.9
elif sys.version_info[:2] >= (3, 9):
class _TypeAliasForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
@_TypeAliasForm
def TypeAlias(self, parameters):
"""Special marker indicating that an assignment should
be recognized as a proper type alias definition by type
checkers.
For example::
Predicate: TypeAlias = Callable[..., bool]
It's invalid when used anywhere except as in the example above.
"""
raise TypeError(f"{self} is not subscriptable")
# 3.7-3.8
elif sys.version_info[:2] >= (3, 7):
class _TypeAliasForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
TypeAlias = _TypeAliasForm('TypeAlias',
doc="""Special marker indicating that an assignment should
be recognized as a proper type alias definition by type
checkers.
For example::
Predicate: TypeAlias = Callable[..., bool]
It's invalid when used anywhere except as in the example
above.""")
# 3.6
else:
class _TypeAliasMeta(typing.TypingMeta):
"""Metaclass for TypeAlias"""
def __repr__(self):
return 'typing_extensions.TypeAlias'
class _TypeAliasBase(typing._FinalTypingBase, metaclass=_TypeAliasMeta, _root=True):
"""Special marker indicating that an assignment should
be recognized as a proper type alias definition by type
checkers.
For example::
Predicate: TypeAlias = Callable[..., bool]
It's invalid when used anywhere except as in the example above.
"""
__slots__ = ()
def __instancecheck__(self, obj):
raise TypeError("TypeAlias cannot be used with isinstance().")
def __subclasscheck__(self, cls):
raise TypeError("TypeAlias cannot be used with issubclass().")
def __repr__(self):
return 'typing_extensions.TypeAlias'
TypeAlias = _TypeAliasBase(_root=True)
# Python 3.10+ has PEP 612
if hasattr(typing, 'ParamSpecArgs'):
ParamSpecArgs = typing.ParamSpecArgs
ParamSpecKwargs = typing.ParamSpecKwargs
# 3.6-3.9
else:
class _Immutable:
"""Mixin to indicate that object should not be copied."""
__slots__ = ()
def __copy__(self):
return self
def __deepcopy__(self, memo):
return self
class ParamSpecArgs(_Immutable):
"""The args for a ParamSpec object.
Given a ParamSpec object P, P.args is an instance of ParamSpecArgs.
ParamSpecArgs objects have a reference back to their ParamSpec:
P.args.__origin__ is P
This type is meant for runtime introspection and has no special meaning to
static type checkers.
"""
def __init__(self, origin):
self.__origin__ = origin
def __repr__(self):
return f"{self.__origin__.__name__}.args"
def __eq__(self, other):
if not isinstance(other, ParamSpecArgs):
return NotImplemented
return self.__origin__ == other.__origin__
class ParamSpecKwargs(_Immutable):
"""The kwargs for a ParamSpec object.
Given a ParamSpec object P, P.kwargs is an instance of ParamSpecKwargs.
ParamSpecKwargs objects have a reference back to their ParamSpec:
P.kwargs.__origin__ is P
This type is meant for runtime introspection and has no special meaning to
static type checkers.
"""
def __init__(self, origin):
self.__origin__ = origin
def __repr__(self):
return f"{self.__origin__.__name__}.kwargs"
def __eq__(self, other):
if not isinstance(other, ParamSpecKwargs):
return NotImplemented
return self.__origin__ == other.__origin__
# 3.10+
if hasattr(typing, 'ParamSpec'):
ParamSpec = typing.ParamSpec
# 3.6-3.9
else:
# Inherits from list as a workaround for Callable checks in Python < 3.9.2.
class ParamSpec(list):
"""Parameter specification variable.
Usage::
P = ParamSpec('P')
Parameter specification variables exist primarily for the benefit of static
type checkers. They are used to forward the parameter types of one
callable to another callable, a pattern commonly found in higher order
functions and decorators. They are only valid when used in ``Concatenate``,
or s the first argument to ``Callable``. In Python 3.10 and higher,
they are also supported in user-defined Generics at runtime.
See class Generic for more information on generic types. An
example for annotating a decorator::
T = TypeVar('T')
P = ParamSpec('P')
def add_logging(f: Callable[P, T]) -> Callable[P, T]:
'''A type-safe decorator to add logging to a function.'''
def inner(*args: P.args, **kwargs: P.kwargs) -> T:
logging.info(f'{f.__name__} was called')
return f(*args, **kwargs)
return inner
@add_logging
def add_two(x: float, y: float) -> float:
'''Add two numbers together.'''
return x + y
Parameter specification variables defined with covariant=True or
contravariant=True can be used to declare covariant or contravariant
generic types. These keyword arguments are valid, but their actual semantics
are yet to be decided. See PEP 612 for details.
Parameter specification variables can be introspected. e.g.:
P.__name__ == 'T'
P.__bound__ == None
P.__covariant__ == False
P.__contravariant__ == False
Note that only parameter specification variables defined in global scope can
be pickled.
"""
# Trick Generic __parameters__.
__class__ = typing.TypeVar
@property
def args(self):
return ParamSpecArgs(self)
@property
def kwargs(self):
return ParamSpecKwargs(self)
def __init__(self, name, *, bound=None, covariant=False, contravariant=False):
super().__init__([self])
self.__name__ = name
self.__covariant__ = bool(covariant)
self.__contravariant__ = bool(contravariant)
if bound:
self.__bound__ = typing._type_check(bound, 'Bound must be a type.')
else:
self.__bound__ = None
# for pickling:
try:
def_mod = sys._getframe(1).f_globals.get('__name__', '__main__')
except (AttributeError, ValueError):
def_mod = None
if def_mod != 'typing_extensions':
self.__module__ = def_mod
def __repr__(self):
if self.__covariant__:
prefix = '+'
elif self.__contravariant__:
prefix = '-'
else:
prefix = '~'
return prefix + self.__name__
def __hash__(self):
return object.__hash__(self)
def __eq__(self, other):
return self is other
def __reduce__(self):
return self.__name__
# Hack to get typing._type_check to pass.
def __call__(self, *args, **kwargs):
pass
if not PEP_560:
# Only needed in 3.6.
def _get_type_vars(self, tvars):
if self not in tvars:
tvars.append(self)
# 3.6-3.9
if not hasattr(typing, 'Concatenate'):
# Inherits from list as a workaround for Callable checks in Python < 3.9.2.
class _ConcatenateGenericAlias(list):
# Trick Generic into looking into this for __parameters__.
if PEP_560:
__class__ = typing._GenericAlias
else:
__class__ = typing._TypingBase
# Flag in 3.8.
_special = False
# Attribute in 3.6 and earlier.
_gorg = typing.Generic
def __init__(self, origin, args):
super().__init__(args)
self.__origin__ = origin
self.__args__ = args
def __repr__(self):
_type_repr = typing._type_repr
return (f'{_type_repr(self.__origin__)}'
f'[{", ".join(_type_repr(arg) for arg in self.__args__)}]')
def __hash__(self):
return hash((self.__origin__, self.__args__))
# Hack to get typing._type_check to pass in Generic.
def __call__(self, *args, **kwargs):
pass
@property
def __parameters__(self):
return tuple(
tp for tp in self.__args__ if isinstance(tp, (typing.TypeVar, ParamSpec))
)
if not PEP_560:
# Only required in 3.6.
def _get_type_vars(self, tvars):
if self.__origin__ and self.__parameters__:
typing._get_type_vars(self.__parameters__, tvars)
# 3.6-3.9
@typing._tp_cache
def _concatenate_getitem(self, parameters):
if parameters == ():
raise TypeError("Cannot take a Concatenate of no types.")
if not isinstance(parameters, tuple):
parameters = (parameters,)
if not isinstance(parameters[-1], ParamSpec):
raise TypeError("The last parameter to Concatenate should be a "
"ParamSpec variable.")
msg = "Concatenate[arg, ...]: each arg must be a type."
parameters = tuple(typing._type_check(p, msg) for p in parameters)
return _ConcatenateGenericAlias(self, parameters)
# 3.10+
if hasattr(typing, 'Concatenate'):
Concatenate = typing.Concatenate
_ConcatenateGenericAlias = typing._ConcatenateGenericAlias # noqa
# 3.9
elif sys.version_info[:2] >= (3, 9):
@_TypeAliasForm
def Concatenate(self, parameters):
"""Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a
higher order function which adds, removes or transforms parameters of a
callable.
For example::
Callable[Concatenate[int, P], int]
See PEP 612 for detailed information.
"""
return _concatenate_getitem(self, parameters)
# 3.7-8
elif sys.version_info[:2] >= (3, 7):
class _ConcatenateForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
def __getitem__(self, parameters):
return _concatenate_getitem(self, parameters)
Concatenate = _ConcatenateForm(
'Concatenate',
doc="""Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a
higher order function which adds, removes or transforms parameters of a
callable.
For example::
Callable[Concatenate[int, P], int]
See PEP 612 for detailed information.
""")
# 3.6
else:
class _ConcatenateAliasMeta(typing.TypingMeta):
"""Metaclass for Concatenate."""
def __repr__(self):
return 'typing_extensions.Concatenate'
class _ConcatenateAliasBase(typing._FinalTypingBase,
metaclass=_ConcatenateAliasMeta,
_root=True):
"""Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a
higher order function which adds, removes or transforms parameters of a
callable.
For example::
Callable[Concatenate[int, P], int]
See PEP 612 for detailed information.
"""
__slots__ = ()
def __instancecheck__(self, obj):
raise TypeError("Concatenate cannot be used with isinstance().")
def __subclasscheck__(self, cls):
raise TypeError("Concatenate cannot be used with issubclass().")
def __repr__(self):
return 'typing_extensions.Concatenate'
def __getitem__(self, parameters):
return _concatenate_getitem(self, parameters)
Concatenate = _ConcatenateAliasBase(_root=True)
# 3.10+
if hasattr(typing, 'TypeGuard'):
TypeGuard = typing.TypeGuard
# 3.9
elif sys.version_info[:2] >= (3, 9):
class _TypeGuardForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
@_TypeGuardForm
def TypeGuard(self, parameters):
"""Special typing form used to annotate the return type of a user-defined
type guard function. ``TypeGuard`` only accepts a single type argument.
At runtime, functions marked this way should return a boolean.
``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static
type checkers to determine a more precise type of an expression within a
program's code flow. Usually type narrowing is done by analyzing
conditional code flow and applying the narrowing to a block of code. The
conditional expression here is sometimes referred to as a "type guard".
Sometimes it would be convenient to use a user-defined boolean function
as a type guard. Such a function should use ``TypeGuard[...]`` as its
return type to alert static type checkers to this intention.
Using ``-> TypeGuard`` tells the static type checker that for a given
function:
1. The return value is a boolean.
2. If the return value is ``True``, the type of its argument
is the type inside ``TypeGuard``.
For example::
def is_str(val: Union[str, float]):
# "isinstance" type guard
if isinstance(val, str):
# Type of ``val`` is narrowed to ``str``
...
else:
# Else, type of ``val`` is narrowed to ``float``.
...
Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower
form of ``TypeA`` (it can even be a wider form) and this may lead to
type-unsafe results. The main reason is to allow for things like
narrowing ``List[object]`` to ``List[str]`` even though the latter is not
a subtype of the former, since ``List`` is invariant. The responsibility of
writing type-safe type guards is left to the user.
``TypeGuard`` also works with type variables. For more information, see
PEP 647 (User-Defined Type Guards).
"""
item = typing._type_check(parameters, f'{self} accepts only single type.')
return typing._GenericAlias(self, (item,))
# 3.7-3.8
elif sys.version_info[:2] >= (3, 7):
class _TypeGuardForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
def __getitem__(self, parameters):
item = typing._type_check(parameters,
f'{self._name} accepts only a single type')
return typing._GenericAlias(self, (item,))
TypeGuard = _TypeGuardForm(
'TypeGuard',
doc="""Special typing form used to annotate the return type of a user-defined
type guard function. ``TypeGuard`` only accepts a single type argument.
At runtime, functions marked this way should return a boolean.
``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static
type checkers to determine a more precise type of an expression within a
program's code flow. Usually type narrowing is done by analyzing
conditional code flow and applying the narrowing to a block of code. The
conditional expression here is sometimes referred to as a "type guard".
Sometimes it would be convenient to use a user-defined boolean function
as a type guard. Such a function should use ``TypeGuard[...]`` as its
return type to alert static type checkers to this intention.
Using ``-> TypeGuard`` tells the static type checker that for a given
function:
1. The return value is a boolean.
2. If the return value is ``True``, the type of its argument
is the type inside ``TypeGuard``.
For example::
def is_str(val: Union[str, float]):
# "isinstance" type guard
if isinstance(val, str):
# Type of ``val`` is narrowed to ``str``
...
else:
# Else, type of ``val`` is narrowed to ``float``.
...
Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower
form of ``TypeA`` (it can even be a wider form) and this may lead to
type-unsafe results. The main reason is to allow for things like
narrowing ``List[object]`` to ``List[str]`` even though the latter is not
a subtype of the former, since ``List`` is invariant. The responsibility of
writing type-safe type guards is left to the user.
``TypeGuard`` also works with type variables. For more information, see
PEP 647 (User-Defined Type Guards).
""")
# 3.6
else:
class _TypeGuard(typing._FinalTypingBase, _root=True):
"""Special typing form used to annotate the return type of a user-defined
type guard function. ``TypeGuard`` only accepts a single type argument.
At runtime, functions marked this way should return a boolean.
``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static
type checkers to determine a more precise type of an expression within a
program's code flow. Usually type narrowing is done by analyzing
conditional code flow and applying the narrowing to a block of code. The
conditional expression here is sometimes referred to as a "type guard".
Sometimes it would be convenient to use a user-defined boolean function
as a type guard. Such a function should use ``TypeGuard[...]`` as its
return type to alert static type checkers to this intention.
Using ``-> TypeGuard`` tells the static type checker that for a given
function:
1. The return value is a boolean.
2. If the return value is ``True``, the type of its argument
is the type inside ``TypeGuard``.
For example::
def is_str(val: Union[str, float]):
# "isinstance" type guard
if isinstance(val, str):
# Type of ``val`` is narrowed to ``str``
...
else:
# Else, type of ``val`` is narrowed to ``float``.
...
Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower
form of ``TypeA`` (it can even be a wider form) and this may lead to
type-unsafe results. The main reason is to allow for things like
narrowing ``List[object]`` to ``List[str]`` even though the latter is not
a subtype of the former, since ``List`` is invariant. The responsibility of
writing type-safe type guards is left to the user.
``TypeGuard`` also works with type variables. For more information, see
PEP 647 (User-Defined Type Guards).
"""
__slots__ = ('__type__',)
def __init__(self, tp=None, **kwds):
self.__type__ = tp
def __getitem__(self, item):
cls = type(self)
if self.__type__ is None:
return cls(typing._type_check(item,
f'{cls.__name__[1:]} accepts only a single type.'),
_root=True)
raise TypeError(f'{cls.__name__[1:]} cannot be further subscripted')
def _eval_type(self, globalns, localns):
new_tp = typing._eval_type(self.__type__, globalns, localns)
if new_tp == self.__type__:
return self
return type(self)(new_tp, _root=True)
def __repr__(self):
r = super().__repr__()
if self.__type__ is not None:
r += f'[{typing._type_repr(self.__type__)}]'
return r
def __hash__(self):
return hash((type(self).__name__, self.__type__))
def __eq__(self, other):
if not isinstance(other, _TypeGuard):
return NotImplemented
if self.__type__ is not None:
return self.__type__ == other.__type__
return self is other
TypeGuard = _TypeGuard(_root=True)
if sys.version_info[:2] >= (3, 7):
# Vendored from cpython typing._SpecialFrom
class _SpecialForm(typing._Final, _root=True):
__slots__ = ('_name', '__doc__', '_getitem')
def __init__(self, getitem):
self._getitem = getitem
self._name = getitem.__name__
self.__doc__ = getitem.__doc__
def __getattr__(self, item):
if item in {'__name__', '__qualname__'}:
return self._name
raise AttributeError(item)
def __mro_entries__(self, bases):
raise TypeError(f"Cannot subclass {self!r}")
def __repr__(self):
return f'typing_extensions.{self._name}'
def __reduce__(self):
return self._name
def __call__(self, *args, **kwds):
raise TypeError(f"Cannot instantiate {self!r}")
def __or__(self, other):
return typing.Union[self, other]
def __ror__(self, other):
return typing.Union[other, self]
def __instancecheck__(self, obj):
raise TypeError(f"{self} cannot be used with isinstance()")
def __subclasscheck__(self, cls):
raise TypeError(f"{self} cannot be used with issubclass()")
@typing._tp_cache
def __getitem__(self, parameters):
return self._getitem(self, parameters)
if hasattr(typing, "LiteralString"):
LiteralString = typing.LiteralString
elif sys.version_info[:2] >= (3, 7):
@_SpecialForm
def LiteralString(self, params):
"""Represents an arbitrary literal string.
Example::
from metaflow._vendor.v3_6.typing_extensions import LiteralString
def query(sql: LiteralString) -> ...:
...
query("SELECT * FROM table") # ok
query(f"SELECT * FROM {input()}") # not ok
See PEP 675 for details.
"""
raise TypeError(f"{self} is not subscriptable")
else:
class _LiteralString(typing._FinalTypingBase, _root=True):
"""Represents an arbitrary literal string.
Example::
from metaflow._vendor.v3_6.typing_extensions import LiteralString
def query(sql: LiteralString) -> ...:
...
query("SELECT * FROM table") # ok
query(f"SELECT * FROM {input()}") # not ok
See PEP 675 for details.
"""
__slots__ = ()
def __instancecheck__(self, obj):
raise TypeError(f"{self} cannot be used with isinstance().")
def __subclasscheck__(self, cls):
raise TypeError(f"{self} cannot be used with issubclass().")
LiteralString = _LiteralString(_root=True)
if hasattr(typing, "Self"):
Self = typing.Self
elif sys.version_info[:2] >= (3, 7):
@_SpecialForm
def Self(self, params):
"""Used to spell the type of "self" in classes.
Example::
from typing import Self
class ReturnsSelf:
def parse(self, data: bytes) -> Self:
...
return self
"""
raise TypeError(f"{self} is not subscriptable")
else:
class _Self(typing._FinalTypingBase, _root=True):
"""Used to spell the type of "self" in classes.
Example::
from typing import Self
class ReturnsSelf:
def parse(self, data: bytes) -> Self:
...
return self
"""
__slots__ = ()
def __instancecheck__(self, obj):
raise TypeError(f"{self} cannot be used with isinstance().")
def __subclasscheck__(self, cls):
raise TypeError(f"{self} cannot be used with issubclass().")
Self = _Self(_root=True)
if hasattr(typing, "Never"):
Never = typing.Never
elif sys.version_info[:2] >= (3, 7):
@_SpecialForm
def Never(self, params):
"""The bottom type, a type that has no members.
This can be used to define a function that should never be
called, or a function that never returns::
from metaflow._vendor.v3_6.typing_extensions import Never
def never_call_me(arg: Never) -> None:
pass
def int_or_str(arg: int | str) -> None:
never_call_me(arg) # type checker error
match arg:
case int():
print("It's an int")
case str():
print("It's a str")
case _:
never_call_me(arg) # ok, arg is of type Never
"""
raise TypeError(f"{self} is not subscriptable")
else:
class _Never(typing._FinalTypingBase, _root=True):
"""The bottom type, a type that has no members.
This can be used to define a function that should never be
called, or a function that never returns::
from metaflow._vendor.v3_6.typing_extensions import Never
def never_call_me(arg: Never) -> None:
pass
def int_or_str(arg: int | str) -> None:
never_call_me(arg) # type checker error
match arg:
case int():
print("It's an int")
case str():
print("It's a str")
case _:
never_call_me(arg) # ok, arg is of type Never
"""
__slots__ = ()
def __instancecheck__(self, obj):
raise TypeError(f"{self} cannot be used with isinstance().")
def __subclasscheck__(self, cls):
raise TypeError(f"{self} cannot be used with issubclass().")
Never = _Never(_root=True)
if hasattr(typing, 'Required'):
Required = typing.Required
NotRequired = typing.NotRequired
elif sys.version_info[:2] >= (3, 9):
class _ExtensionsSpecialForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
@_ExtensionsSpecialForm
def Required(self, parameters):
"""A special typing construct to mark a key of a total=False TypedDict
as required. For example:
class Movie(TypedDict, total=False):
title: Required[str]
year: int
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
There is no runtime checking that a required key is actually provided
when instantiating a related TypedDict.
"""
item = typing._type_check(parameters, f'{self._name} accepts only single type')
return typing._GenericAlias(self, (item,))
@_ExtensionsSpecialForm
def NotRequired(self, parameters):
"""A special typing construct to mark a key of a TypedDict as
potentially missing. For example:
class Movie(TypedDict):
title: str
year: NotRequired[int]
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
"""
item = typing._type_check(parameters, f'{self._name} accepts only single type')
return typing._GenericAlias(self, (item,))
elif sys.version_info[:2] >= (3, 7):
class _RequiredForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
def __getitem__(self, parameters):
item = typing._type_check(parameters,
'{} accepts only single type'.format(self._name))
return typing._GenericAlias(self, (item,))
Required = _RequiredForm(
'Required',
doc="""A special typing construct to mark a key of a total=False TypedDict
as required. For example:
class Movie(TypedDict, total=False):
title: Required[str]
year: int
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
There is no runtime checking that a required key is actually provided
when instantiating a related TypedDict.
""")
NotRequired = _RequiredForm(
'NotRequired',
doc="""A special typing construct to mark a key of a TypedDict as
potentially missing. For example:
class Movie(TypedDict):
title: str
year: NotRequired[int]
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
""")
else:
# NOTE: Modeled after _Final's implementation when _FinalTypingBase available
class _MaybeRequired(typing._FinalTypingBase, _root=True):
__slots__ = ('__type__',)
def __init__(self, tp=None, **kwds):
self.__type__ = tp
def __getitem__(self, item):
cls = type(self)
if self.__type__ is None:
return cls(typing._type_check(item,
'{} accepts only single type.'.format(cls.__name__[1:])),
_root=True)
raise TypeError('{} cannot be further subscripted'
.format(cls.__name__[1:]))
def _eval_type(self, globalns, localns):
new_tp = typing._eval_type(self.__type__, globalns, localns)
if new_tp == self.__type__:
return self
return type(self)(new_tp, _root=True)
def __repr__(self):
r = super().__repr__()
if self.__type__ is not None:
r += '[{}]'.format(typing._type_repr(self.__type__))
return r
def __hash__(self):
return hash((type(self).__name__, self.__type__))
def __eq__(self, other):
if not isinstance(other, type(self)):
return NotImplemented
if self.__type__ is not None:
return self.__type__ == other.__type__
return self is other
class _Required(_MaybeRequired, _root=True):
"""A special typing construct to mark a key of a total=False TypedDict
as required. For example:
class Movie(TypedDict, total=False):
title: Required[str]
year: int
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
There is no runtime checking that a required key is actually provided
when instantiating a related TypedDict.
"""
class _NotRequired(_MaybeRequired, _root=True):
"""A special typing construct to mark a key of a TypedDict as
potentially missing. For example:
class Movie(TypedDict):
title: str
year: NotRequired[int]
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
"""
Required = _Required(_root=True)
NotRequired = _NotRequired(_root=True)
if sys.version_info[:2] >= (3, 9):
class _UnpackSpecialForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
class _UnpackAlias(typing._GenericAlias, _root=True):
__class__ = typing.TypeVar
@_UnpackSpecialForm
def Unpack(self, parameters):
"""A special typing construct to unpack a variadic type. For example:
Shape = TypeVarTuple('Shape')
Batch = NewType('Batch', int)
def add_batch_axis(
x: Array[Unpack[Shape]]
) -> Array[Batch, Unpack[Shape]]: ...
"""
item = typing._type_check(parameters, f'{self._name} accepts only single type')
return _UnpackAlias(self, (item,))
def _is_unpack(obj):
return isinstance(obj, _UnpackAlias)
elif sys.version_info[:2] >= (3, 7):
class _UnpackAlias(typing._GenericAlias, _root=True):
__class__ = typing.TypeVar
class _UnpackForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
def __getitem__(self, parameters):
item = typing._type_check(parameters,
f'{self._name} accepts only single type')
return _UnpackAlias(self, (item,))
Unpack = _UnpackForm(
'Unpack',
doc="""A special typing construct to unpack a variadic type. For example:
Shape = TypeVarTuple('Shape')
Batch = NewType('Batch', int)
def add_batch_axis(
x: Array[Unpack[Shape]]
) -> Array[Batch, Unpack[Shape]]: ...
""")
def _is_unpack(obj):
return isinstance(obj, _UnpackAlias)
else:
# NOTE: Modeled after _Final's implementation when _FinalTypingBase available
class _Unpack(typing._FinalTypingBase, _root=True):
"""A special typing construct to unpack a variadic type. For example:
Shape = TypeVarTuple('Shape')
Batch = NewType('Batch', int)
def add_batch_axis(
x: Array[Unpack[Shape]]
) -> Array[Batch, Unpack[Shape]]: ...
"""
__slots__ = ('__type__',)
__class__ = typing.TypeVar
def __init__(self, tp=None, **kwds):
self.__type__ = tp
def __getitem__(self, item):
cls = type(self)
if self.__type__ is None:
return cls(typing._type_check(item,
'Unpack accepts only single type.'),
_root=True)
raise TypeError('Unpack cannot be further subscripted')
def _eval_type(self, globalns, localns):
new_tp = typing._eval_type(self.__type__, globalns, localns)
if new_tp == self.__type__:
return self
return type(self)(new_tp, _root=True)
def __repr__(self):
r = super().__repr__()
if self.__type__ is not None:
r += '[{}]'.format(typing._type_repr(self.__type__))
return r
def __hash__(self):
return hash((type(self).__name__, self.__type__))
def __eq__(self, other):
if not isinstance(other, _Unpack):
return NotImplemented
if self.__type__ is not None:
return self.__type__ == other.__type__
return self is other
# For 3.6 only
def _get_type_vars(self, tvars):
self.__type__._get_type_vars(tvars)
Unpack = _Unpack(_root=True)
def _is_unpack(obj):
return isinstance(obj, _Unpack)
class TypeVarTuple:
"""Type variable tuple.
Usage::
Ts = TypeVarTuple('Ts')
In the same way that a normal type variable is a stand-in for a single
type such as ``int``, a type variable *tuple* is a stand-in for a *tuple* type such as
``Tuple[int, str]``.
Type variable tuples can be used in ``Generic`` declarations.
Consider the following example::
class Array(Generic[*Ts]): ...
The ``Ts`` type variable tuple here behaves like ``tuple[T1, T2]``,
where ``T1`` and ``T2`` are type variables. To use these type variables
as type parameters of ``Array``, we must *unpack* the type variable tuple using
the star operator: ``*Ts``. The signature of ``Array`` then behaves
as if we had simply written ``class Array(Generic[T1, T2]): ...``.
In contrast to ``Generic[T1, T2]``, however, ``Generic[*Shape]`` allows
us to parameterise the class with an *arbitrary* number of type parameters.
Type variable tuples can be used anywhere a normal ``TypeVar`` can.
This includes class definitions, as shown above, as well as function
signatures and variable annotations::
class Array(Generic[*Ts]):
def __init__(self, shape: Tuple[*Ts]):
self._shape: Tuple[*Ts] = shape
def get_shape(self) -> Tuple[*Ts]:
return self._shape
shape = (Height(480), Width(640))
x: Array[Height, Width] = Array(shape)
y = abs(x) # Inferred type is Array[Height, Width]
z = x + x # ... is Array[Height, Width]
x.get_shape() # ... is tuple[Height, Width]
"""
# Trick Generic __parameters__.
__class__ = typing.TypeVar
def __iter__(self):
yield self.__unpacked__
def __init__(self, name):
self.__name__ = name
# for pickling:
try:
def_mod = sys._getframe(1).f_globals.get('__name__', '__main__')
except (AttributeError, ValueError):
def_mod = None
if def_mod != 'typing_extensions':
self.__module__ = def_mod
self.__unpacked__ = Unpack[self]
def __repr__(self):
return self.__name__
def __hash__(self):
return object.__hash__(self)
def __eq__(self, other):
return self is other
def __reduce__(self):
return self.__name__
def __init_subclass__(self, *args, **kwds):
if '_root' not in kwds:
raise TypeError("Cannot subclass special typing classes")
if not PEP_560:
# Only needed in 3.6.
def _get_type_vars(self, tvars):
if self not in tvars:
tvars.append(self)
if hasattr(typing, "reveal_type"):
reveal_type = typing.reveal_type
else:
def reveal_type(__obj: T) -> T:
"""Reveal the inferred type of a variable.
When a static type checker encounters a call to ``reveal_type()``,
it will emit the inferred type of the argument::
x: int = 1
reveal_type(x)
Running a static type checker (e.g., ``mypy``) on this example
will produce output similar to 'Revealed type is "builtins.int"'.
At runtime, the function prints the runtime type of the
argument and returns it unchanged.
"""
print(f"Runtime type is {type(__obj).__name__!r}", file=sys.stderr)
return __obj
if hasattr(typing, "assert_never"):
assert_never = typing.assert_never
else:
def assert_never(__arg: Never) -> Never:
"""Assert to the type checker that a line of code is unreachable.
Example::
def int_or_str(arg: int | str) -> None:
match arg:
case int():
print("It's an int")
case str():
print("It's a str")
case _:
assert_never(arg)
If a type checker finds that a call to assert_never() is
reachable, it will emit an error.
At runtime, this throws an exception when called.
"""
raise AssertionError("Expected code to be unreachable")
if hasattr(typing, 'dataclass_transform'):
dataclass_transform = typing.dataclass_transform
else:
def dataclass_transform(
*,
eq_default: bool = True,
order_default: bool = False,
kw_only_default: bool = False,
field_descriptors: typing.Tuple[
typing.Union[typing.Type[typing.Any], typing.Callable[..., typing.Any]],
...
] = (),
) -> typing.Callable[[T], T]:
"""Decorator that marks a function, class, or metaclass as providing
dataclass-like behavior.
Example:
from metaflow._vendor.v3_6.typing_extensions import dataclass_transform
_T = TypeVar("_T")
# Used on a decorator function
@dataclass_transform()
def create_model(cls: type[_T]) -> type[_T]:
...
return cls
@create_model
class CustomerModel:
id: int
name: str
# Used on a base class
@dataclass_transform()
class ModelBase: ...
class CustomerModel(ModelBase):
id: int
name: str
# Used on a metaclass
@dataclass_transform()
class ModelMeta(type): ...
class ModelBase(metaclass=ModelMeta): ...
class CustomerModel(ModelBase):
id: int
name: str
Each of the ``CustomerModel`` classes defined in this example will now
behave similarly to a dataclass created with the ``@dataclasses.dataclass``
decorator. For example, the type checker will synthesize an ``__init__``
method.
The arguments to this decorator can be used to customize this behavior:
- ``eq_default`` indicates whether the ``eq`` parameter is assumed to be
True or False if it is omitted by the caller.
- ``order_default`` indicates whether the ``order`` parameter is
assumed to be True or False if it is omitted by the caller.
- ``kw_only_default`` indicates whether the ``kw_only`` parameter is
assumed to be True or False if it is omitted by the caller.
- ``field_descriptors`` specifies a static list of supported classes
or functions, that describe fields, similar to ``dataclasses.field()``.
At runtime, this decorator records its arguments in the
``__dataclass_transform__`` attribute on the decorated object.
See PEP 681 for details.
"""
def decorator(cls_or_fn):
cls_or_fn.__dataclass_transform__ = {
"eq_default": eq_default,
"order_default": order_default,
"kw_only_default": kw_only_default,
"field_descriptors": field_descriptors,
}
return cls_or_fn
return decorator
# We have to do some monkey patching to deal with the dual nature of
# Unpack/TypeVarTuple:
# - We want Unpack to be a kind of TypeVar so it gets accepted in
# Generic[Unpack[Ts]]
# - We want it to *not* be treated as a TypeVar for the purposes of
# counting generic parameters, so that when we subscript a generic,
# the runtime doesn't try to substitute the Unpack with the subscripted type.
if not hasattr(typing, "TypeVarTuple"):
typing._collect_type_vars = _collect_type_vars
typing._check_generic = _check_generic
================================================
FILE: metaflow/_vendor/v3_6/zipp.LICENSE
================================================
Copyright Jason R. Coombs
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
================================================
FILE: metaflow/_vendor/v3_6/zipp.py
================================================
import io
import posixpath
import zipfile
import itertools
import contextlib
import sys
import pathlib
if sys.version_info < (3, 7):
from collections import OrderedDict
else:
OrderedDict = dict
__all__ = ['Path']
def _parents(path):
"""
Given a path with elements separated by
posixpath.sep, generate all parents of that path.
>>> list(_parents('b/d'))
['b']
>>> list(_parents('/b/d/'))
['/b']
>>> list(_parents('b/d/f/'))
['b/d', 'b']
>>> list(_parents('b'))
[]
>>> list(_parents(''))
[]
"""
return itertools.islice(_ancestry(path), 1, None)
def _ancestry(path):
"""
Given a path with elements separated by
posixpath.sep, generate all elements of that path
>>> list(_ancestry('b/d'))
['b/d', 'b']
>>> list(_ancestry('/b/d/'))
['/b/d', '/b']
>>> list(_ancestry('b/d/f/'))
['b/d/f', 'b/d', 'b']
>>> list(_ancestry('b'))
['b']
>>> list(_ancestry(''))
[]
"""
path = path.rstrip(posixpath.sep)
while path and path != posixpath.sep:
yield path
path, tail = posixpath.split(path)
_dedupe = OrderedDict.fromkeys
"""Deduplicate an iterable in original order"""
def _difference(minuend, subtrahend):
"""
Return items in minuend not in subtrahend, retaining order
with O(1) lookup.
"""
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
class CompleteDirs(zipfile.ZipFile):
"""
A ZipFile subclass that ensures that implied directories
are always included in the namelist.
"""
@staticmethod
def _implied_dirs(names):
parents = itertools.chain.from_iterable(map(_parents, names))
as_dirs = (p + posixpath.sep for p in parents)
return _dedupe(_difference(as_dirs, names))
def namelist(self):
names = super(CompleteDirs, self).namelist()
return names + list(self._implied_dirs(names))
def _name_set(self):
return set(self.namelist())
def resolve_dir(self, name):
"""
If the name represents a directory, return that name
as a directory (with the trailing slash).
"""
names = self._name_set()
dirname = name + '/'
dir_match = name not in names and dirname in names
return dirname if dir_match else name
@classmethod
def make(cls, source):
"""
Given a source (filename or zipfile), return an
appropriate CompleteDirs subclass.
"""
if isinstance(source, CompleteDirs):
return source
if not isinstance(source, zipfile.ZipFile):
return cls(_pathlib_compat(source))
# Only allow for FastLookup when supplied zipfile is read-only
if 'r' not in source.mode:
cls = CompleteDirs
source.__class__ = cls
return source
class FastLookup(CompleteDirs):
"""
ZipFile subclass to ensure implicit
dirs exist and are resolved rapidly.
"""
def namelist(self):
with contextlib.suppress(AttributeError):
return self.__names
self.__names = super(FastLookup, self).namelist()
return self.__names
def _name_set(self):
with contextlib.suppress(AttributeError):
return self.__lookup
self.__lookup = super(FastLookup, self)._name_set()
return self.__lookup
def _pathlib_compat(path):
"""
For path-like objects, convert to a filename for compatibility
on Python 3.6.1 and earlier.
"""
try:
return path.__fspath__()
except AttributeError:
return str(path)
class Path:
"""
A pathlib-compatible interface for zip files.
Consider a zip file with this structure::
.
├── a.txt
└── b
├── c.txt
└── d
└── e.txt
>>> data = io.BytesIO()
>>> zf = zipfile.ZipFile(data, 'w')
>>> zf.writestr('a.txt', 'content of a')
>>> zf.writestr('b/c.txt', 'content of c')
>>> zf.writestr('b/d/e.txt', 'content of e')
>>> zf.filename = 'mem/abcde.zip'
Path accepts the zipfile object itself or a filename
>>> root = Path(zf)
From there, several path operations are available.
Directory iteration (including the zip file itself):
>>> a, b = root.iterdir()
>>> a
Path('mem/abcde.zip', 'a.txt')
>>> b
Path('mem/abcde.zip', 'b/')
name property:
>>> b.name
'b'
join with divide operator:
>>> c = b / 'c.txt'
>>> c
Path('mem/abcde.zip', 'b/c.txt')
>>> c.name
'c.txt'
Read text:
>>> c.read_text()
'content of c'
existence:
>>> c.exists()
True
>>> (b / 'missing.txt').exists()
False
Coercion to string:
>>> import os
>>> str(c).replace(os.sep, posixpath.sep)
'mem/abcde.zip/b/c.txt'
At the root, ``name``, ``filename``, and ``parent``
resolve to the zipfile. Note these attributes are not
valid and will raise a ``ValueError`` if the zipfile
has no filename.
>>> root.name
'abcde.zip'
>>> str(root.filename).replace(os.sep, posixpath.sep)
'mem/abcde.zip'
>>> str(root.parent)
'mem'
"""
__repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
def __init__(self, root, at=""):
"""
Construct a Path from a ZipFile or filename.
Note: When the source is an existing ZipFile object,
its type (__class__) will be mutated to a
specialized type. If the caller wishes to retain the
original type, the caller should either create a
separate ZipFile object or pass a filename.
"""
self.root = FastLookup.make(root)
self.at = at
def open(self, mode='r', *args, pwd=None, **kwargs):
"""
Open this entry as text or binary following the semantics
of ``pathlib.Path.open()`` by passing arguments through
to io.TextIOWrapper().
"""
if self.is_dir():
raise IsADirectoryError(self)
zip_mode = mode[0]
if not self.exists() and zip_mode == 'r':
raise FileNotFoundError(self)
stream = self.root.open(self.at, zip_mode, pwd=pwd)
if 'b' in mode:
if args or kwargs:
raise ValueError("encoding args invalid for binary operation")
return stream
return io.TextIOWrapper(stream, *args, **kwargs)
@property
def name(self):
return pathlib.Path(self.at).name or self.filename.name
@property
def suffix(self):
return pathlib.Path(self.at).suffix or self.filename.suffix
@property
def suffixes(self):
return pathlib.Path(self.at).suffixes or self.filename.suffixes
@property
def stem(self):
return pathlib.Path(self.at).stem or self.filename.stem
@property
def filename(self):
return pathlib.Path(self.root.filename).joinpath(self.at)
def read_text(self, *args, **kwargs):
with self.open('r', *args, **kwargs) as strm:
return strm.read()
def read_bytes(self):
with self.open('rb') as strm:
return strm.read()
def _is_child(self, path):
return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
def _next(self, at):
return self.__class__(self.root, at)
def is_dir(self):
return not self.at or self.at.endswith("/")
def is_file(self):
return self.exists() and not self.is_dir()
def exists(self):
return self.at in self.root._name_set()
def iterdir(self):
if not self.is_dir():
raise ValueError("Can't listdir a file")
subs = map(self._next, self.root.namelist())
return filter(self._is_child, subs)
def __str__(self):
return posixpath.join(self.root.filename, self.at)
def __repr__(self):
return self.__repr.format(self=self)
def joinpath(self, *other):
next = posixpath.join(self.at, *map(_pathlib_compat, other))
return self._next(self.root.resolve_dir(next))
__truediv__ = joinpath
@property
def parent(self):
if not self.at:
return self.filename.parent
parent_at = posixpath.dirname(self.at.rstrip('/'))
if parent_at:
parent_at += '/'
return self._next(parent_at)
================================================
FILE: metaflow/_vendor/v3_7/__init__.py
================================================
# Empty file
================================================
FILE: metaflow/_vendor/v3_7/importlib_metadata/__init__.py
================================================
import os
import re
import abc
import csv
import sys
from metaflow._vendor.v3_7 import zipp
import email
import pathlib
import operator
import textwrap
import warnings
import functools
import itertools
import posixpath
import collections
from . import _adapters, _meta
from ._collections import FreezableDefaultDict, Pair
from ._compat import (
NullFinder,
install,
pypy_partial,
)
from ._functools import method_cache, pass_none
from ._itertools import always_iterable, unique_everseen
from ._meta import PackageMetadata, SimplePath
from contextlib import suppress
from importlib import import_module
from importlib.abc import MetaPathFinder
from itertools import starmap
from typing import List, Mapping, Optional, Union
__all__ = [
'Distribution',
'DistributionFinder',
'PackageMetadata',
'PackageNotFoundError',
'distribution',
'distributions',
'entry_points',
'files',
'metadata',
'packages_distributions',
'requires',
'version',
]
class PackageNotFoundError(ModuleNotFoundError):
"""The package was not found."""
def __str__(self):
return f"No package metadata was found for {self.name}"
@property
def name(self):
(name,) = self.args
return name
class Sectioned:
"""
A simple entry point config parser for performance
>>> for item in Sectioned.read(Sectioned._sample):
... print(item)
Pair(name='sec1', value='# comments ignored')
Pair(name='sec1', value='a = 1')
Pair(name='sec1', value='b = 2')
Pair(name='sec2', value='a = 2')
>>> res = Sectioned.section_pairs(Sectioned._sample)
>>> item = next(res)
>>> item.name
'sec1'
>>> item.value
Pair(name='a', value='1')
>>> item = next(res)
>>> item.value
Pair(name='b', value='2')
>>> item = next(res)
>>> item.name
'sec2'
>>> item.value
Pair(name='a', value='2')
>>> list(res)
[]
"""
_sample = textwrap.dedent(
"""
[sec1]
# comments ignored
a = 1
b = 2
[sec2]
a = 2
"""
).lstrip()
@classmethod
def section_pairs(cls, text):
return (
section._replace(value=Pair.parse(section.value))
for section in cls.read(text, filter_=cls.valid)
if section.name is not None
)
@staticmethod
def read(text, filter_=None):
lines = filter(filter_, map(str.strip, text.splitlines()))
name = None
for value in lines:
section_match = value.startswith('[') and value.endswith(']')
if section_match:
name = value.strip('[]')
continue
yield Pair(name, value)
@staticmethod
def valid(line):
return line and not line.startswith('#')
class DeprecatedTuple:
"""
Provide subscript item access for backward compatibility.
>>> recwarn = getfixture('recwarn')
>>> ep = EntryPoint(name='name', value='value', group='group')
>>> ep[:]
('name', 'value', 'group')
>>> ep[0]
'name'
>>> len(recwarn)
1
"""
_warn = functools.partial(
warnings.warn,
"EntryPoint tuple interface is deprecated. Access members by name.",
DeprecationWarning,
stacklevel=pypy_partial(2),
)
def __getitem__(self, item):
self._warn()
return self._key()[item]
class EntryPoint(DeprecatedTuple):
"""An entry point as defined by Python packaging conventions.
See `the packaging docs on entry points
`_
for more information.
"""
pattern = re.compile(
r'(?P[\w.]+)\s*'
r'(:\s*(?P[\w.]+))?\s*'
r'(?P\[.*\])?\s*$'
)
"""
A regular expression describing the syntax for an entry point,
which might look like:
- module
- package.module
- package.module:attribute
- package.module:object.attribute
- package.module:attr [extra1, extra2]
Other combinations are possible as well.
The expression is lenient about whitespace around the ':',
following the attr, and following any extras.
"""
dist: Optional['Distribution'] = None
def __init__(self, name, value, group):
vars(self).update(name=name, value=value, group=group)
def load(self):
"""Load the entry point from its definition. If only a module
is indicated by the value, return that module. Otherwise,
return the named object.
"""
match = self.pattern.match(self.value)
module = import_module(match.group('module'))
attrs = filter(None, (match.group('attr') or '').split('.'))
return functools.reduce(getattr, attrs, module)
@property
def module(self):
match = self.pattern.match(self.value)
return match.group('module')
@property
def attr(self):
match = self.pattern.match(self.value)
return match.group('attr')
@property
def extras(self):
match = self.pattern.match(self.value)
return list(re.finditer(r'\w+', match.group('extras') or ''))
def _for(self, dist):
vars(self).update(dist=dist)
return self
def __iter__(self):
"""
Supply iter so one may construct dicts of EntryPoints by name.
"""
msg = (
"Construction of dict of EntryPoints is deprecated in "
"favor of EntryPoints."
)
warnings.warn(msg, DeprecationWarning)
return iter((self.name, self))
def matches(self, **params):
attrs = (getattr(self, param) for param in params)
return all(map(operator.eq, params.values(), attrs))
def _key(self):
return self.name, self.value, self.group
def __lt__(self, other):
return self._key() < other._key()
def __eq__(self, other):
return self._key() == other._key()
def __setattr__(self, name, value):
raise AttributeError("EntryPoint objects are immutable.")
def __repr__(self):
return (
f'EntryPoint(name={self.name!r}, value={self.value!r}, '
f'group={self.group!r})'
)
def __hash__(self):
return hash(self._key())
class DeprecatedList(list):
"""
Allow an otherwise immutable object to implement mutability
for compatibility.
>>> recwarn = getfixture('recwarn')
>>> dl = DeprecatedList(range(3))
>>> dl[0] = 1
>>> dl.append(3)
>>> del dl[3]
>>> dl.reverse()
>>> dl.sort()
>>> dl.extend([4])
>>> dl.pop(-1)
4
>>> dl.remove(1)
>>> dl += [5]
>>> dl + [6]
[1, 2, 5, 6]
>>> dl + (6,)
[1, 2, 5, 6]
>>> dl.insert(0, 0)
>>> dl
[0, 1, 2, 5]
>>> dl == [0, 1, 2, 5]
True
>>> dl == (0, 1, 2, 5)
True
>>> len(recwarn)
1
"""
_warn = functools.partial(
warnings.warn,
"EntryPoints list interface is deprecated. Cast to list if needed.",
DeprecationWarning,
stacklevel=pypy_partial(2),
)
def _wrap_deprecated_method(method_name: str): # type: ignore
def wrapped(self, *args, **kwargs):
self._warn()
return getattr(super(), method_name)(*args, **kwargs)
return wrapped
for method_name in [
'__setitem__',
'__delitem__',
'append',
'reverse',
'extend',
'pop',
'remove',
'__iadd__',
'insert',
'sort',
]:
locals()[method_name] = _wrap_deprecated_method(method_name)
def __add__(self, other):
if not isinstance(other, tuple):
self._warn()
other = tuple(other)
return self.__class__(tuple(self) + other)
def __eq__(self, other):
if not isinstance(other, tuple):
self._warn()
other = tuple(other)
return tuple(self).__eq__(other)
class EntryPoints(DeprecatedList):
"""
An immutable collection of selectable EntryPoint objects.
"""
__slots__ = ()
def __getitem__(self, name): # -> EntryPoint:
"""
Get the EntryPoint in self matching name.
"""
if isinstance(name, int):
warnings.warn(
"Accessing entry points by index is deprecated. "
"Cast to tuple if needed.",
DeprecationWarning,
stacklevel=2,
)
return super().__getitem__(name)
try:
return next(iter(self.select(name=name)))
except StopIteration:
raise KeyError(name)
def select(self, **params):
"""
Select entry points from self that match the
given parameters (typically group and/or name).
"""
return EntryPoints(ep for ep in self if ep.matches(**params))
@property
def names(self):
"""
Return the set of all names of all entry points.
"""
return {ep.name for ep in self}
@property
def groups(self):
"""
Return the set of all groups of all entry points.
For coverage while SelectableGroups is present.
>>> EntryPoints().groups
set()
"""
return {ep.group for ep in self}
@classmethod
def _from_text_for(cls, text, dist):
return cls(ep._for(dist) for ep in cls._from_text(text))
@staticmethod
def _from_text(text):
return (
EntryPoint(name=item.value.name, value=item.value.value, group=item.name)
for item in Sectioned.section_pairs(text or '')
)
class Deprecated:
"""
Compatibility add-in for mapping to indicate that
mapping behavior is deprecated.
>>> recwarn = getfixture('recwarn')
>>> class DeprecatedDict(Deprecated, dict): pass
>>> dd = DeprecatedDict(foo='bar')
>>> dd.get('baz', None)
>>> dd['foo']
'bar'
>>> list(dd)
['foo']
>>> list(dd.keys())
['foo']
>>> 'foo' in dd
True
>>> list(dd.values())
['bar']
>>> len(recwarn)
1
"""
_warn = functools.partial(
warnings.warn,
"SelectableGroups dict interface is deprecated. Use select.",
DeprecationWarning,
stacklevel=pypy_partial(2),
)
def __getitem__(self, name):
self._warn()
return super().__getitem__(name)
def get(self, name, default=None):
self._warn()
return super().get(name, default)
def __iter__(self):
self._warn()
return super().__iter__()
def __contains__(self, *args):
self._warn()
return super().__contains__(*args)
def keys(self):
self._warn()
return super().keys()
def values(self):
self._warn()
return super().values()
class SelectableGroups(Deprecated, dict):
"""
A backward- and forward-compatible result from
entry_points that fully implements the dict interface.
"""
@classmethod
def load(cls, eps):
by_group = operator.attrgetter('group')
ordered = sorted(eps, key=by_group)
grouped = itertools.groupby(ordered, by_group)
return cls((group, EntryPoints(eps)) for group, eps in grouped)
@property
def _all(self):
"""
Reconstruct a list of all entrypoints from the groups.
"""
groups = super(Deprecated, self).values()
return EntryPoints(itertools.chain.from_iterable(groups))
@property
def groups(self):
return self._all.groups
@property
def names(self):
"""
for coverage:
>>> SelectableGroups().names
set()
"""
return self._all.names
def select(self, **params):
if not params:
return self
return self._all.select(**params)
class PackagePath(pathlib.PurePosixPath):
"""A reference to a path in a package"""
def read_text(self, encoding='utf-8'):
with self.locate().open(encoding=encoding) as stream:
return stream.read()
def read_binary(self):
with self.locate().open('rb') as stream:
return stream.read()
def locate(self):
"""Return a path-like object for this path"""
return self.dist.locate_file(self)
class FileHash:
def __init__(self, spec):
self.mode, _, self.value = spec.partition('=')
def __repr__(self):
return f''
class Distribution:
"""A Python distribution package."""
@abc.abstractmethod
def read_text(self, filename):
"""Attempt to load metadata file given by the name.
:param filename: The name of the file in the distribution info.
:return: The text if found, otherwise None.
"""
@abc.abstractmethod
def locate_file(self, path):
"""
Given a path to a file in this distribution, return a path
to it.
"""
@classmethod
def from_name(cls, name):
"""Return the Distribution for the given package name.
:param name: The name of the distribution package to search for.
:return: The Distribution instance (or subclass thereof) for the named
package, if found.
:raises PackageNotFoundError: When the named package's distribution
metadata cannot be found.
"""
for resolver in cls._discover_resolvers():
dists = resolver(DistributionFinder.Context(name=name))
dist = next(iter(dists), None)
if dist is not None:
return dist
else:
raise PackageNotFoundError(name)
@classmethod
def discover(cls, **kwargs):
"""Return an iterable of Distribution objects for all packages.
Pass a ``context`` or pass keyword arguments for constructing
a context.
:context: A ``DistributionFinder.Context`` object.
:return: Iterable of Distribution objects for all packages.
"""
context = kwargs.pop('context', None)
if context and kwargs:
raise ValueError("cannot accept context and kwargs")
context = context or DistributionFinder.Context(**kwargs)
return itertools.chain.from_iterable(
resolver(context) for resolver in cls._discover_resolvers()
)
@staticmethod
def at(path):
"""Return a Distribution for the indicated metadata path
:param path: a string or path-like object
:return: a concrete Distribution instance for the path
"""
return PathDistribution(pathlib.Path(path))
@staticmethod
def _discover_resolvers():
"""Search the meta_path for resolvers."""
declared = (
getattr(finder, 'find_distributions', None) for finder in sys.meta_path
)
return filter(None, declared)
@classmethod
def _local(cls, root='.'):
from pep517 import build, meta
system = build.compat_system(root)
builder = functools.partial(
meta.build,
source_dir=root,
system=system,
)
return PathDistribution(zipp.Path(meta.build_as_zip(builder)))
@property
def metadata(self) -> _meta.PackageMetadata:
"""Return the parsed metadata for this Distribution.
The returned object will have keys that name the various bits of
metadata. See PEP 566 for details.
"""
text = (
self.read_text('METADATA')
or self.read_text('PKG-INFO')
# This last clause is here to support old egg-info files. Its
# effect is to just end up using the PathDistribution's self._path
# (which points to the egg-info file) attribute unchanged.
or self.read_text('')
)
return _adapters.Message(email.message_from_string(text))
@property
def name(self):
"""Return the 'Name' metadata for the distribution package."""
return self.metadata['Name']
@property
def _normalized_name(self):
"""Return a normalized version of the name."""
return Prepared.normalize(self.name)
@property
def version(self):
"""Return the 'Version' metadata for the distribution package."""
return self.metadata['Version']
@property
def entry_points(self):
return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self)
@property
def files(self):
"""Files in this distribution.
:return: List of PackagePath for this distribution or None
Result is `None` if the metadata file that enumerates files
(i.e. RECORD for dist-info or SOURCES.txt for egg-info) is
missing.
Result may be empty if the metadata exists but is empty.
"""
def make_file(name, hash=None, size_str=None):
result = PackagePath(name)
result.hash = FileHash(hash) if hash else None
result.size = int(size_str) if size_str else None
result.dist = self
return result
@pass_none
def make_files(lines):
return list(starmap(make_file, csv.reader(lines)))
return make_files(self._read_files_distinfo() or self._read_files_egginfo())
def _read_files_distinfo(self):
"""
Read the lines of RECORD
"""
text = self.read_text('RECORD')
return text and text.splitlines()
def _read_files_egginfo(self):
"""
SOURCES.txt might contain literal commas, so wrap each line
in quotes.
"""
text = self.read_text('SOURCES.txt')
return text and map('"{}"'.format, text.splitlines())
@property
def requires(self):
"""Generated requirements specified for this Distribution"""
reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
return reqs and list(reqs)
def _read_dist_info_reqs(self):
return self.metadata.get_all('Requires-Dist')
def _read_egg_info_reqs(self):
source = self.read_text('requires.txt')
return source and self._deps_from_requires_text(source)
@classmethod
def _deps_from_requires_text(cls, source):
return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source))
@staticmethod
def _convert_egg_info_reqs_to_simple_reqs(sections):
"""
Historically, setuptools would solicit and store 'extra'
requirements, including those with environment markers,
in separate sections. More modern tools expect each
dependency to be defined separately, with any relevant
extras and environment markers attached directly to that
requirement. This method converts the former to the
latter. See _test_deps_from_requires_text for an example.
"""
def make_condition(name):
return name and f'extra == "{name}"'
def quoted_marker(section):
section = section or ''
extra, sep, markers = section.partition(':')
if extra and markers:
markers = f'({markers})'
conditions = list(filter(None, [markers, make_condition(extra)]))
return '; ' + ' and '.join(conditions) if conditions else ''
def url_req_space(req):
"""
PEP 508 requires a space between the url_spec and the quoted_marker.
Ref python/importlib_metadata#357.
"""
# '@' is uniquely indicative of a url_req.
return ' ' * ('@' in req)
for section in sections:
space = url_req_space(section.value)
yield section.value + space + quoted_marker(section.name)
class DistributionFinder(MetaPathFinder):
"""
A MetaPathFinder capable of discovering installed distributions.
"""
class Context:
"""
Keyword arguments presented by the caller to
``distributions()`` or ``Distribution.discover()``
to narrow the scope of a search for distributions
in all DistributionFinders.
Each DistributionFinder may expect any parameters
and should attempt to honor the canonical
parameters defined below when appropriate.
"""
name = None
"""
Specific name for which a distribution finder should match.
A name of ``None`` matches all distributions.
"""
def __init__(self, **kwargs):
vars(self).update(kwargs)
@property
def path(self):
"""
The sequence of directory path that a distribution finder
should search.
Typically refers to Python installed package paths such as
"site-packages" directories and defaults to ``sys.path``.
"""
return vars(self).get('path', sys.path)
@abc.abstractmethod
def find_distributions(self, context=Context()):
"""
Find distributions.
Return an iterable of all Distribution instances capable of
loading the metadata for packages matching the ``context``,
a DistributionFinder.Context instance.
"""
class FastPath:
"""
Micro-optimized class for searching a path for
children.
>>> FastPath('').children()
['...']
"""
@functools.lru_cache() # type: ignore
def __new__(cls, root):
return super().__new__(cls)
def __init__(self, root):
self.root = str(root)
def joinpath(self, child):
return pathlib.Path(self.root, child)
def children(self):
with suppress(Exception):
return os.listdir(self.root or '.')
with suppress(Exception):
return self.zip_children()
return []
def zip_children(self):
zip_path = zipp.Path(self.root)
names = zip_path.root.namelist()
self.joinpath = zip_path.joinpath
return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names)
def search(self, name):
return self.lookup(self.mtime).search(name)
@property
def mtime(self):
with suppress(OSError):
return os.stat(self.root).st_mtime
self.lookup.cache_clear()
@method_cache
def lookup(self, mtime):
return Lookup(self)
class Lookup:
def __init__(self, path: FastPath):
base = os.path.basename(path.root).lower()
base_is_egg = base.endswith(".egg")
self.infos = FreezableDefaultDict(list)
self.eggs = FreezableDefaultDict(list)
for child in path.children():
low = child.lower()
if low.endswith((".dist-info", ".egg-info")):
# rpartition is faster than splitext and suitable for this purpose.
name = low.rpartition(".")[0].partition("-")[0]
normalized = Prepared.normalize(name)
self.infos[normalized].append(path.joinpath(child))
elif base_is_egg and low == "egg-info":
name = base.rpartition(".")[0].partition("-")[0]
legacy_normalized = Prepared.legacy_normalize(name)
self.eggs[legacy_normalized].append(path.joinpath(child))
self.infos.freeze()
self.eggs.freeze()
def search(self, prepared):
infos = (
self.infos[prepared.normalized]
if prepared
else itertools.chain.from_iterable(self.infos.values())
)
eggs = (
self.eggs[prepared.legacy_normalized]
if prepared
else itertools.chain.from_iterable(self.eggs.values())
)
return itertools.chain(infos, eggs)
class Prepared:
"""
A prepared search for metadata on a possibly-named package.
"""
normalized = None
legacy_normalized = None
def __init__(self, name):
self.name = name
if name is None:
return
self.normalized = self.normalize(name)
self.legacy_normalized = self.legacy_normalize(name)
@staticmethod
def normalize(name):
"""
PEP 503 normalization plus dashes as underscores.
"""
return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_')
@staticmethod
def legacy_normalize(name):
"""
Normalize the package name as found in the convention in
older packaging tools versions and specs.
"""
return name.lower().replace('-', '_')
def __bool__(self):
return bool(self.name)
@install
class MetadataPathFinder(NullFinder, DistributionFinder):
"""A degenerate finder for distribution packages on the file system.
This finder supplies only a find_distributions() method for versions
of Python that do not have a PathFinder find_distributions().
"""
def find_distributions(self, context=DistributionFinder.Context()):
"""
Find distributions.
Return an iterable of all Distribution instances capable of
loading the metadata for packages matching ``context.name``
(or all names if ``None`` indicated) along the paths in the list
of directories ``context.path``.
"""
found = self._search_paths(context.name, context.path)
return map(PathDistribution, found)
@classmethod
def _search_paths(cls, name, paths):
"""Find metadata directories in paths heuristically."""
prepared = Prepared(name)
return itertools.chain.from_iterable(
path.search(prepared) for path in map(FastPath, paths)
)
def invalidate_caches(cls):
FastPath.__new__.cache_clear()
class PathDistribution(Distribution):
def __init__(self, path: SimplePath):
"""Construct a distribution.
:param path: SimplePath indicating the metadata directory.
"""
self._path = path
def read_text(self, filename):
with suppress(
FileNotFoundError,
IsADirectoryError,
KeyError,
NotADirectoryError,
PermissionError,
):
return self._path.joinpath(filename).read_text(encoding='utf-8')
read_text.__doc__ = Distribution.read_text.__doc__
def locate_file(self, path):
return self._path.parent / path
@property
def _normalized_name(self):
"""
Performance optimization: where possible, resolve the
normalized name from the file system path.
"""
stem = os.path.basename(str(self._path))
return self._name_from_stem(stem) or super()._normalized_name
def _name_from_stem(self, stem):
name, ext = os.path.splitext(stem)
if ext not in ('.dist-info', '.egg-info'):
return
name, sep, rest = stem.partition('-')
return name
def distribution(distribution_name):
"""Get the ``Distribution`` instance for the named package.
:param distribution_name: The name of the distribution package as a string.
:return: A ``Distribution`` instance (or subclass thereof).
"""
return Distribution.from_name(distribution_name)
def distributions(**kwargs):
"""Get all ``Distribution`` instances in the current environment.
:return: An iterable of ``Distribution`` instances.
"""
return Distribution.discover(**kwargs)
def metadata(distribution_name) -> _meta.PackageMetadata:
"""Get the metadata for the named package.
:param distribution_name: The name of the distribution package to query.
:return: A PackageMetadata containing the parsed metadata.
"""
return Distribution.from_name(distribution_name).metadata
def version(distribution_name):
"""Get the version string for the named package.
:param distribution_name: The name of the distribution package to query.
:return: The version string for the package as defined in the package's
"Version" metadata key.
"""
return distribution(distribution_name).version
def entry_points(**params) -> Union[EntryPoints, SelectableGroups]:
"""Return EntryPoint objects for all installed packages.
Pass selection parameters (group or name) to filter the
result to entry points matching those properties (see
EntryPoints.select()).
For compatibility, returns ``SelectableGroups`` object unless
selection parameters are supplied. In the future, this function
will return ``EntryPoints`` instead of ``SelectableGroups``
even when no selection parameters are supplied.
For maximum future compatibility, pass selection parameters
or invoke ``.select`` with parameters on the result.
:return: EntryPoints or SelectableGroups for all installed packages.
"""
norm_name = operator.attrgetter('_normalized_name')
unique = functools.partial(unique_everseen, key=norm_name)
eps = itertools.chain.from_iterable(
dist.entry_points for dist in unique(distributions())
)
return SelectableGroups.load(eps).select(**params)
def files(distribution_name):
"""Return a list of files for the named package.
:param distribution_name: The name of the distribution package to query.
:return: List of files composing the distribution.
"""
return distribution(distribution_name).files
def requires(distribution_name):
"""
Return a list of requirements for the named package.
:return: An iterator of requirements, suitable for
packaging.requirement.Requirement.
"""
return distribution(distribution_name).requires
def packages_distributions() -> Mapping[str, List[str]]:
"""
Return a mapping of top-level packages to their
distributions.
>>> import collections.abc
>>> pkgs = packages_distributions()
>>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values())
True
"""
pkg_to_dist = collections.defaultdict(list)
for dist in distributions():
for pkg in _top_level_declared(dist) or _top_level_inferred(dist):
pkg_to_dist[pkg].append(dist.metadata['Name'])
return dict(pkg_to_dist)
def _top_level_declared(dist):
return (dist.read_text('top_level.txt') or '').split()
def _top_level_inferred(dist):
return {
f.parts[0] if len(f.parts) > 1 else f.with_suffix('').name
for f in always_iterable(dist.files)
if f.suffix == ".py"
}
================================================
FILE: metaflow/_vendor/v3_7/importlib_metadata/_adapters.py
================================================
import re
import textwrap
import email.message
from ._text import FoldedCase
class Message(email.message.Message):
multiple_use_keys = set(
map(
FoldedCase,
[
'Classifier',
'Obsoletes-Dist',
'Platform',
'Project-URL',
'Provides-Dist',
'Provides-Extra',
'Requires-Dist',
'Requires-External',
'Supported-Platform',
'Dynamic',
],
)
)
"""
Keys that may be indicated multiple times per PEP 566.
"""
def __new__(cls, orig: email.message.Message):
res = super().__new__(cls)
vars(res).update(vars(orig))
return res
def __init__(self, *args, **kwargs):
self._headers = self._repair_headers()
# suppress spurious error from mypy
def __iter__(self):
return super().__iter__()
def _repair_headers(self):
def redent(value):
"Correct for RFC822 indentation"
if not value or '\n' not in value:
return value
return textwrap.dedent(' ' * 8 + value)
headers = [(key, redent(value)) for key, value in vars(self)['_headers']]
if self._payload:
headers.append(('Description', self.get_payload()))
return headers
@property
def json(self):
"""
Convert PackageMetadata to a JSON-compatible format
per PEP 0566.
"""
def transform(key):
value = self.get_all(key) if key in self.multiple_use_keys else self[key]
if key == 'Keywords':
value = re.split(r'\s+', value)
tk = key.lower().replace('-', '_')
return tk, value
return dict(map(transform, map(FoldedCase, self)))
================================================
FILE: metaflow/_vendor/v3_7/importlib_metadata/_collections.py
================================================
import collections
# from jaraco.collections 3.3
class FreezableDefaultDict(collections.defaultdict):
"""
Often it is desirable to prevent the mutation of
a default dict after its initial construction, such
as to prevent mutation during iteration.
>>> dd = FreezableDefaultDict(list)
>>> dd[0].append('1')
>>> dd.freeze()
>>> dd[1]
[]
>>> len(dd)
1
"""
def __missing__(self, key):
return getattr(self, '_frozen', super().__missing__)(key)
def freeze(self):
self._frozen = lambda key: self.default_factory()
class Pair(collections.namedtuple('Pair', 'name value')):
@classmethod
def parse(cls, text):
return cls(*map(str.strip, text.split("=", 1)))
================================================
FILE: metaflow/_vendor/v3_7/importlib_metadata/_compat.py
================================================
import sys
import platform
__all__ = ['install', 'NullFinder', 'Protocol']
try:
from typing import Protocol
except ImportError: # pragma: no cover
from metaflow._vendor.v3_7.typing_extensions import Protocol # type: ignore
def install(cls):
"""
Class decorator for installation on sys.meta_path.
Adds the backport DistributionFinder to sys.meta_path and
attempts to disable the finder functionality of the stdlib
DistributionFinder.
"""
sys.meta_path.append(cls())
disable_stdlib_finder()
return cls
def disable_stdlib_finder():
"""
Give the backport primacy for discovering path-based distributions
by monkey-patching the stdlib O_O.
See #91 for more background for rationale on this sketchy
behavior.
"""
def matches(finder):
return getattr(
finder, '__module__', None
) == '_frozen_importlib_external' and hasattr(finder, 'find_distributions')
for finder in filter(matches, sys.meta_path): # pragma: nocover
del finder.find_distributions
class NullFinder:
"""
A "Finder" (aka "MetaClassFinder") that never finds any modules,
but may find distributions.
"""
@staticmethod
def find_spec(*args, **kwargs):
return None
# In Python 2, the import system requires finders
# to have a find_module() method, but this usage
# is deprecated in Python 3 in favor of find_spec().
# For the purposes of this finder (i.e. being present
# on sys.meta_path but having no other import
# system functionality), the two methods are identical.
find_module = find_spec
def pypy_partial(val):
"""
Adjust for variable stacklevel on partial under PyPy.
Workaround for #327.
"""
is_pypy = platform.python_implementation() == 'PyPy'
return val + is_pypy
================================================
FILE: metaflow/_vendor/v3_7/importlib_metadata/_functools.py
================================================
import types
import functools
# from jaraco.functools 3.3
def method_cache(method, cache_wrapper=None):
"""
Wrap lru_cache to support storing the cache data in the object instances.
Abstracts the common paradigm where the method explicitly saves an
underscore-prefixed protected property on first call and returns that
subsequently.
>>> class MyClass:
... calls = 0
...
... @method_cache
... def method(self, value):
... self.calls += 1
... return value
>>> a = MyClass()
>>> a.method(3)
3
>>> for x in range(75):
... res = a.method(x)
>>> a.calls
75
Note that the apparent behavior will be exactly like that of lru_cache
except that the cache is stored on each instance, so values in one
instance will not flush values from another, and when an instance is
deleted, so are the cached values for that instance.
>>> b = MyClass()
>>> for x in range(35):
... res = b.method(x)
>>> b.calls
35
>>> a.method(0)
0
>>> a.calls
75
Note that if method had been decorated with ``functools.lru_cache()``,
a.calls would have been 76 (due to the cached value of 0 having been
flushed by the 'b' instance).
Clear the cache with ``.cache_clear()``
>>> a.method.cache_clear()
Same for a method that hasn't yet been called.
>>> c = MyClass()
>>> c.method.cache_clear()
Another cache wrapper may be supplied:
>>> cache = functools.lru_cache(maxsize=2)
>>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache)
>>> a = MyClass()
>>> a.method2()
3
Caution - do not subsequently wrap the method with another decorator, such
as ``@property``, which changes the semantics of the function.
See also
http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/
for another implementation and additional justification.
"""
cache_wrapper = cache_wrapper or functools.lru_cache()
def wrapper(self, *args, **kwargs):
# it's the first call, replace the method with a cached, bound method
bound_method = types.MethodType(method, self)
cached_method = cache_wrapper(bound_method)
setattr(self, method.__name__, cached_method)
return cached_method(*args, **kwargs)
# Support cache clear even before cache has been created.
wrapper.cache_clear = lambda: None
return wrapper
# From jaraco.functools 3.3
def pass_none(func):
"""
Wrap func so it's not called if its first param is None
>>> print_text = pass_none(print)
>>> print_text('text')
text
>>> print_text(None)
"""
@functools.wraps(func)
def wrapper(param, *args, **kwargs):
if param is not None:
return func(param, *args, **kwargs)
return wrapper
================================================
FILE: metaflow/_vendor/v3_7/importlib_metadata/_itertools.py
================================================
from itertools import filterfalse
def unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D
seen = set()
seen_add = seen.add
if key is None:
for element in filterfalse(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element
# copied from more_itertools 8.8
def always_iterable(obj, base_type=(str, bytes)):
"""If *obj* is iterable, return an iterator over its items::
>>> obj = (1, 2, 3)
>>> list(always_iterable(obj))
[1, 2, 3]
If *obj* is not iterable, return a one-item iterable containing *obj*::
>>> obj = 1
>>> list(always_iterable(obj))
[1]
If *obj* is ``None``, return an empty iterable:
>>> obj = None
>>> list(always_iterable(None))
[]
By default, binary and text strings are not considered iterable::
>>> obj = 'foo'
>>> list(always_iterable(obj))
['foo']
If *base_type* is set, objects for which ``isinstance(obj, base_type)``
returns ``True`` won't be considered iterable.
>>> obj = {'a': 1}
>>> list(always_iterable(obj)) # Iterate over the dict's keys
['a']
>>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit
[{'a': 1}]
Set *base_type* to ``None`` to avoid any special handling and treat objects
Python considers iterable as iterable:
>>> obj = 'foo'
>>> list(always_iterable(obj, base_type=None))
['f', 'o', 'o']
"""
if obj is None:
return iter(())
if (base_type is not None) and isinstance(obj, base_type):
return iter((obj,))
try:
return iter(obj)
except TypeError:
return iter((obj,))
================================================
FILE: metaflow/_vendor/v3_7/importlib_metadata/_meta.py
================================================
from ._compat import Protocol
from typing import Any, Dict, Iterator, List, TypeVar, Union
_T = TypeVar("_T")
class PackageMetadata(Protocol):
def __len__(self) -> int:
... # pragma: no cover
def __contains__(self, item: str) -> bool:
... # pragma: no cover
def __getitem__(self, key: str) -> str:
... # pragma: no cover
def __iter__(self) -> Iterator[str]:
... # pragma: no cover
def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]:
"""
Return all values associated with a possibly multi-valued key.
"""
@property
def json(self) -> Dict[str, Union[str, List[str]]]:
"""
A JSON-compatible form of the metadata.
"""
class SimplePath(Protocol):
"""
A minimal subset of pathlib.Path required by PathDistribution.
"""
def joinpath(self) -> 'SimplePath':
... # pragma: no cover
def __truediv__(self) -> 'SimplePath':
... # pragma: no cover
def parent(self) -> 'SimplePath':
... # pragma: no cover
def read_text(self) -> str:
... # pragma: no cover
================================================
FILE: metaflow/_vendor/v3_7/importlib_metadata/_text.py
================================================
import re
from ._functools import method_cache
# from jaraco.text 3.5
class FoldedCase(str):
"""
A case insensitive string class; behaves just like str
except compares equal when the only variation is case.
>>> s = FoldedCase('hello world')
>>> s == 'Hello World'
True
>>> 'Hello World' == s
True
>>> s != 'Hello World'
False
>>> s.index('O')
4
>>> s.split('O')
['hell', ' w', 'rld']
>>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
['alpha', 'Beta', 'GAMMA']
Sequence membership is straightforward.
>>> "Hello World" in [s]
True
>>> s in ["Hello World"]
True
You may test for set inclusion, but candidate and elements
must both be folded.
>>> FoldedCase("Hello World") in {s}
True
>>> s in {FoldedCase("Hello World")}
True
String inclusion works as long as the FoldedCase object
is on the right.
>>> "hello" in FoldedCase("Hello World")
True
But not if the FoldedCase object is on the left:
>>> FoldedCase('hello') in 'Hello World'
False
In that case, use in_:
>>> FoldedCase('hello').in_('Hello World')
True
>>> FoldedCase('hello') > FoldedCase('Hello')
False
"""
def __lt__(self, other):
return self.lower() < other.lower()
def __gt__(self, other):
return self.lower() > other.lower()
def __eq__(self, other):
return self.lower() == other.lower()
def __ne__(self, other):
return self.lower() != other.lower()
def __hash__(self):
return hash(self.lower())
def __contains__(self, other):
return super().lower().__contains__(other.lower())
def in_(self, other):
"Does self appear in other?"
return self in FoldedCase(other)
# cache lower since it's likely to be called frequently.
@method_cache
def lower(self):
return super().lower()
def index(self, sub):
return self.lower().index(sub.lower())
def split(self, splitter=' ', maxsplit=0):
pattern = re.compile(re.escape(splitter), re.I)
return pattern.split(self, maxsplit)
================================================
FILE: metaflow/_vendor/v3_7/importlib_metadata/py.typed
================================================
================================================
FILE: metaflow/_vendor/v3_7/importlib_metadata.LICENSE
================================================
Copyright 2017-2019 Jason R. Coombs, Barry Warsaw
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: metaflow/_vendor/v3_7/typeguard/__init__.py
================================================
import os
from typing import Any
from ._checkers import TypeCheckerCallable as TypeCheckerCallable
from ._checkers import TypeCheckLookupCallback as TypeCheckLookupCallback
from ._checkers import check_type_internal as check_type_internal
from ._checkers import checker_lookup_functions as checker_lookup_functions
from ._checkers import load_plugins as load_plugins
from ._config import CollectionCheckStrategy as CollectionCheckStrategy
from ._config import ForwardRefPolicy as ForwardRefPolicy
from ._config import TypeCheckConfiguration as TypeCheckConfiguration
from ._decorators import typechecked as typechecked
from ._decorators import typeguard_ignore as typeguard_ignore
from ._exceptions import InstrumentationWarning as InstrumentationWarning
from ._exceptions import TypeCheckError as TypeCheckError
from ._exceptions import TypeCheckWarning as TypeCheckWarning
from ._exceptions import TypeHintWarning as TypeHintWarning
from ._functions import TypeCheckFailCallback as TypeCheckFailCallback
from ._functions import check_type as check_type
from ._functions import warn_on_error as warn_on_error
from ._importhook import ImportHookManager as ImportHookManager
from ._importhook import TypeguardFinder as TypeguardFinder
from ._importhook import install_import_hook as install_import_hook
from ._memo import TypeCheckMemo as TypeCheckMemo
from ._suppression import suppress_type_checks as suppress_type_checks
from ._utils import Unset as Unset
# Re-export imports so they look like they live directly in this package
for value in list(locals().values()):
if getattr(value, "__module__", "").startswith(f"{__name__}."):
value.__module__ = __name__
config: TypeCheckConfiguration
def __getattr__(name: str) -> Any:
if name == "config":
from ._config import global_config
return global_config
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
# Automatically load checker lookup functions unless explicitly disabled
if "TYPEGUARD_DISABLE_PLUGIN_AUTOLOAD" not in os.environ:
load_plugins()
================================================
FILE: metaflow/_vendor/v3_7/typeguard/_checkers.py
================================================
from __future__ import annotations
import collections.abc
import inspect
import sys
import types
import typing
import warnings
from enum import Enum
from inspect import Parameter, isclass, isfunction
from io import BufferedIOBase, IOBase, RawIOBase, TextIOBase
from textwrap import indent
from typing import (
IO,
AbstractSet,
Any,
BinaryIO,
Callable,
Dict,
ForwardRef,
List,
Mapping,
MutableMapping,
NewType,
Optional,
Sequence,
Set,
TextIO,
Tuple,
Type,
TypeVar,
Union,
)
from unittest.mock import Mock
try:
from metaflow._vendor.v3_7 import typing_extensions
except ImportError:
typing_extensions = None # type: ignore[assignment]
from ._config import ForwardRefPolicy
from ._exceptions import TypeCheckError, TypeHintWarning
from ._memo import TypeCheckMemo
from ._utils import evaluate_forwardref, get_stacklevel, get_type_name, qualified_name
if sys.version_info >= (3, 11):
from typing import (
Annotated,
TypeAlias,
get_args,
get_origin,
get_type_hints,
is_typeddict,
)
SubclassableAny = Any
else:
from metaflow._vendor.v3_7.typing_extensions import (
Annotated,
TypeAlias,
get_args,
get_origin,
get_type_hints,
is_typeddict,
)
from metaflow._vendor.v3_7.typing_extensions import Any as SubclassableAny
if sys.version_info >= (3, 10):
from importlib.metadata import entry_points
from typing import ParamSpec
else:
from metaflow._vendor.v3_7.importlib_metadata import entry_points
from metaflow._vendor.v3_7.typing_extensions import ParamSpec
TypeCheckerCallable: TypeAlias = Callable[
[Any, Any, Tuple[Any, ...], TypeCheckMemo], Any
]
TypeCheckLookupCallback: TypeAlias = Callable[
[Any, Tuple[Any, ...], Tuple[Any, ...]], Optional[TypeCheckerCallable]
]
checker_lookup_functions: list[TypeCheckLookupCallback] = []
# Sentinel
_missing = object()
# Lifted from mypy.sharedparse
BINARY_MAGIC_METHODS = {
"__add__",
"__and__",
"__cmp__",
"__divmod__",
"__div__",
"__eq__",
"__floordiv__",
"__ge__",
"__gt__",
"__iadd__",
"__iand__",
"__idiv__",
"__ifloordiv__",
"__ilshift__",
"__imatmul__",
"__imod__",
"__imul__",
"__ior__",
"__ipow__",
"__irshift__",
"__isub__",
"__itruediv__",
"__ixor__",
"__le__",
"__lshift__",
"__lt__",
"__matmul__",
"__mod__",
"__mul__",
"__ne__",
"__or__",
"__pow__",
"__radd__",
"__rand__",
"__rdiv__",
"__rfloordiv__",
"__rlshift__",
"__rmatmul__",
"__rmod__",
"__rmul__",
"__ror__",
"__rpow__",
"__rrshift__",
"__rshift__",
"__rsub__",
"__rtruediv__",
"__rxor__",
"__sub__",
"__truediv__",
"__xor__",
}
def check_callable(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not callable(value):
raise TypeCheckError("is not callable")
if args:
try:
signature = inspect.signature(value)
except (TypeError, ValueError):
return
argument_types = args[0]
if isinstance(argument_types, list) and not any(
type(item) is ParamSpec for item in argument_types
):
# The callable must not have keyword-only arguments without defaults
unfulfilled_kwonlyargs = [
param.name
for param in signature.parameters.values()
if param.kind == Parameter.KEYWORD_ONLY
and param.default == Parameter.empty
]
if unfulfilled_kwonlyargs:
raise TypeCheckError(
f"has mandatory keyword-only arguments in its declaration: "
f'{", ".join(unfulfilled_kwonlyargs)}'
)
num_mandatory_args = len(
[
param.name
for param in signature.parameters.values()
if param.kind
in (Parameter.POSITIONAL_ONLY, Parameter.POSITIONAL_OR_KEYWORD)
and param.default is Parameter.empty
]
)
has_varargs = any(
param
for param in signature.parameters.values()
if param.kind == Parameter.VAR_POSITIONAL
)
if num_mandatory_args > len(argument_types):
raise TypeCheckError(
f"has too many arguments in its declaration; expected "
f"{len(argument_types)} but {num_mandatory_args} argument(s) "
f"declared"
)
elif not has_varargs and num_mandatory_args < len(argument_types):
raise TypeCheckError(
f"has too few arguments in its declaration; expected "
f"{len(argument_types)} but {num_mandatory_args} argument(s) "
f"declared"
)
def check_mapping(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if origin_type is Dict or origin_type is dict:
if not isinstance(value, dict):
raise TypeCheckError("is not a dict")
if origin_type is MutableMapping or origin_type is collections.abc.MutableMapping:
if not isinstance(value, collections.abc.MutableMapping):
raise TypeCheckError("is not a mutable mapping")
elif not isinstance(value, collections.abc.Mapping):
raise TypeCheckError("is not a mapping")
if args:
key_type, value_type = args
if key_type is not Any or value_type is not Any:
samples = memo.config.collection_check_strategy.iterate_samples(
value.items()
)
for k, v in samples:
try:
check_type_internal(k, key_type, memo)
except TypeCheckError as exc:
exc.append_path_element(f"key {k!r}")
raise
try:
check_type_internal(v, value_type, memo)
except TypeCheckError as exc:
exc.append_path_element(f"value of key {k!r}")
raise
def check_typed_dict(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isinstance(value, dict):
raise TypeCheckError("is not a dict")
declared_keys = frozenset(origin_type.__annotations__)
if hasattr(origin_type, "__required_keys__"):
required_keys = origin_type.__required_keys__
else: # py3.8 and lower
required_keys = declared_keys if origin_type.__total__ else frozenset()
existing_keys = frozenset(value)
extra_keys = existing_keys - declared_keys
if extra_keys:
keys_formatted = ", ".join(f'"{key}"' for key in sorted(extra_keys, key=repr))
raise TypeCheckError(f"has unexpected extra key(s): {keys_formatted}")
missing_keys = required_keys - existing_keys
if missing_keys:
keys_formatted = ", ".join(f'"{key}"' for key in sorted(missing_keys, key=repr))
raise TypeCheckError(f"is missing required key(s): {keys_formatted}")
for key, argtype in get_type_hints(origin_type).items():
argvalue = value.get(key, _missing)
if argvalue is not _missing:
try:
check_type_internal(argvalue, argtype, memo)
except TypeCheckError as exc:
exc.append_path_element(f"value of key {key!r}")
raise
def check_list(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isinstance(value, list):
raise TypeCheckError("is not a list")
if args and args != (Any,):
samples = memo.config.collection_check_strategy.iterate_samples(value)
for i, v in enumerate(samples):
try:
check_type_internal(v, args[0], memo)
except TypeCheckError as exc:
exc.append_path_element(f"item {i}")
raise
def check_sequence(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isinstance(value, collections.abc.Sequence):
raise TypeCheckError("is not a sequence")
if args and args != (Any,):
samples = memo.config.collection_check_strategy.iterate_samples(value)
for i, v in enumerate(samples):
try:
check_type_internal(v, args[0], memo)
except TypeCheckError as exc:
exc.append_path_element(f"item {i}")
raise
def check_set(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if origin_type is frozenset:
if not isinstance(value, frozenset):
raise TypeCheckError("is not a frozenset")
elif not isinstance(value, AbstractSet):
raise TypeCheckError("is not a set")
if args and args != (Any,):
samples = memo.config.collection_check_strategy.iterate_samples(value)
for v in samples:
try:
check_type_internal(v, args[0], memo)
except TypeCheckError as exc:
exc.append_path_element(f"[{v}]")
raise
def check_tuple(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
# Specialized check for NamedTuples
field_types = getattr(origin_type, "__annotations__", None)
if field_types is None and sys.version_info < (3, 8):
field_types = getattr(origin_type, "_field_types", None)
if field_types:
if not isinstance(value, origin_type):
raise TypeCheckError(
f"is not a named tuple of type {qualified_name(origin_type)}"
)
for name, field_type in field_types.items():
try:
check_type_internal(getattr(value, name), field_type, memo)
except TypeCheckError as exc:
exc.append_path_element(f"attribute {name!r}")
raise
return
elif not isinstance(value, tuple):
raise TypeCheckError("is not a tuple")
if args:
# Python 3.6+
use_ellipsis = args[-1] is Ellipsis
tuple_params = args[: -1 if use_ellipsis else None]
else:
# Unparametrized Tuple or plain tuple
return
if use_ellipsis:
element_type = tuple_params[0]
samples = memo.config.collection_check_strategy.iterate_samples(value)
for i, element in enumerate(samples):
try:
check_type_internal(element, element_type, memo)
except TypeCheckError as exc:
exc.append_path_element(f"item {i}")
raise
elif tuple_params == ((),):
if value != ():
raise TypeCheckError("is not an empty tuple")
else:
if len(value) != len(tuple_params):
raise TypeCheckError(
f"has wrong number of elements (expected {len(tuple_params)}, got "
f"{len(value)} instead)"
)
for i, (element, element_type) in enumerate(zip(value, tuple_params)):
try:
check_type_internal(element, element_type, memo)
except TypeCheckError as exc:
exc.append_path_element(f"item {i}")
raise
def check_union(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
errors: dict[str, TypeCheckError] = {}
for type_ in args:
try:
check_type_internal(value, type_, memo)
return
except TypeCheckError as exc:
errors[get_type_name(type_)] = exc
formatted_errors = indent(
"\n".join(f"{key}: {error}" for key, error in errors.items()), " "
)
raise TypeCheckError(f"did not match any element in the union:\n{formatted_errors}")
def check_uniontype(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
errors: dict[str, TypeCheckError] = {}
for type_ in args:
try:
check_type_internal(value, type_, memo)
return
except TypeCheckError as exc:
errors[get_type_name(type_)] = exc
formatted_errors = indent(
"\n".join(f"{key}: {error}" for key, error in errors.items()), " "
)
raise TypeCheckError(f"did not match any element in the union:\n{formatted_errors}")
def check_class(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isclass(value):
raise TypeCheckError("is not a class")
# Needed on Python 3.7+
if not args:
return
if isinstance(args[0], ForwardRef):
expected_class = evaluate_forwardref(args[0], memo)
else:
expected_class = args[0]
if expected_class is Any:
return
elif getattr(expected_class, "_is_protocol", False):
check_protocol(value, expected_class, (), memo)
elif isinstance(expected_class, TypeVar):
check_typevar(value, expected_class, (), memo, subclass_check=True)
elif get_origin(expected_class) is Union:
errors: dict[str, TypeCheckError] = {}
for arg in get_args(expected_class):
if arg is Any:
return
try:
check_class(value, type, (arg,), memo)
return
except TypeCheckError as exc:
errors[get_type_name(arg)] = exc
else:
formatted_errors = indent(
"\n".join(f"{key}: {error}" for key, error in errors.items()), " "
)
raise TypeCheckError(
f"did not match any element in the union:\n{formatted_errors}"
)
elif not issubclass(value, expected_class):
raise TypeCheckError(f"is not a subclass of {qualified_name(expected_class)}")
def check_newtype(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
check_type_internal(value, origin_type.__supertype__, memo)
def check_instance(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isinstance(value, origin_type):
raise TypeCheckError(f"is not an instance of {qualified_name(origin_type)}")
def check_typevar(
value: Any,
origin_type: TypeVar,
args: tuple[Any, ...],
memo: TypeCheckMemo,
*,
subclass_check: bool = False,
) -> None:
if origin_type.__bound__ is not None:
annotation = (
Type[origin_type.__bound__] if subclass_check else origin_type.__bound__
)
check_type_internal(value, annotation, memo)
elif origin_type.__constraints__:
for constraint in origin_type.__constraints__:
annotation = Type[constraint] if subclass_check else constraint
try:
check_type_internal(value, annotation, memo)
except TypeCheckError:
pass
else:
break
else:
formatted_constraints = ", ".join(
get_type_name(constraint) for constraint in origin_type.__constraints__
)
raise TypeCheckError(
f"does not match any of the constraints " f"({formatted_constraints})"
)
if sys.version_info >= (3, 8):
if typing_extensions is None:
def _is_literal_type(typ: object) -> bool:
return typ is typing.Literal
else:
def _is_literal_type(typ: object) -> bool:
return typ is typing.Literal or typ is typing_extensions.Literal
else:
def _is_literal_type(typ: object) -> bool:
return typ is typing_extensions.Literal
def check_literal(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
def get_literal_args(literal_args: tuple[Any, ...]) -> tuple[Any, ...]:
retval: list[Any] = []
for arg in literal_args:
if _is_literal_type(get_origin(arg)):
# The first check works on py3.6 and lower, the second one on py3.7+
retval.extend(get_literal_args(arg.__args__))
elif arg is None or isinstance(arg, (int, str, bytes, bool, Enum)):
retval.append(arg)
else:
raise TypeError(
f"Illegal literal value: {arg}"
) # TypeError here is deliberate
return tuple(retval)
final_args = tuple(get_literal_args(args))
try:
index = final_args.index(value)
except ValueError:
pass
else:
if type(final_args[index]) is type(value):
return
formatted_args = ", ".join(repr(arg) for arg in final_args)
raise TypeCheckError(f"is not any of ({formatted_args})") from None
def check_literal_string(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
check_type_internal(value, str, memo)
def check_typeguard(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
check_type_internal(value, bool, memo)
def check_none(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if value is not None:
raise TypeCheckError("is not None")
def check_number(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if origin_type is complex and not isinstance(value, (complex, float, int)):
raise TypeCheckError("is neither complex, float or int")
elif origin_type is float and not isinstance(value, (float, int)):
raise TypeCheckError("is neither float or int")
def check_io(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if origin_type is TextIO or (origin_type is IO and args == (str,)):
if not isinstance(value, TextIOBase):
raise TypeCheckError("is not a text based I/O object")
elif origin_type is BinaryIO or (origin_type is IO and args == (bytes,)):
if not isinstance(value, (RawIOBase, BufferedIOBase)):
raise TypeCheckError("is not a binary I/O object")
elif not isinstance(value, IOBase):
raise TypeCheckError("is not an I/O object")
def check_protocol(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
# TODO: implement proper compatibility checking and support non-runtime protocols
if getattr(origin_type, "_is_runtime_protocol", False):
if not isinstance(value, origin_type):
raise TypeCheckError(
f"is not compatible with the {origin_type.__qualname__} protocol"
)
else:
warnings.warn(
f"Typeguard cannot check the {origin_type.__qualname__} protocol because "
f"it is a non-runtime protocol. If you would like to type check this "
f"protocol, please use @typing.runtime_checkable",
stacklevel=get_stacklevel(),
)
def check_byteslike(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isinstance(value, (bytearray, bytes, memoryview)):
raise TypeCheckError("is not bytes-like")
def check_self(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if memo.self_type is None:
raise TypeCheckError("cannot be checked against Self outside of a method call")
if isclass(value):
if not issubclass(value, memo.self_type):
raise TypeCheckError(
f"is not an instance of the self type "
f"({qualified_name(memo.self_type)})"
)
elif not isinstance(value, memo.self_type):
raise TypeCheckError(
f"is not an instance of the self type ({qualified_name(memo.self_type)})"
)
def check_paramspec(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
pass # No-op for now
def check_instanceof(
value: Any,
origin_type: Any,
args: tuple[Any, ...],
memo: TypeCheckMemo,
) -> None:
if not isinstance(value, origin_type):
raise TypeCheckError(f"is not an instance of {qualified_name(origin_type)}")
def check_type_internal(
value: Any,
annotation: Any,
memo: TypeCheckMemo,
) -> None:
"""
Check that the given object is compatible with the given type annotation.
This function should only be used by type checker callables. Applications should use
:func:`~.check_type` instead.
:param value: the value to check
:param annotation: the type annotation to check against
:param memo: a memo object containing configuration and information necessary for
looking up forward references
"""
if isinstance(annotation, ForwardRef):
try:
annotation = evaluate_forwardref(annotation, memo)
except NameError:
if memo.config.forward_ref_policy is ForwardRefPolicy.ERROR:
raise
elif memo.config.forward_ref_policy is ForwardRefPolicy.WARN:
warnings.warn(
f"Cannot resolve forward reference {annotation.__forward_arg__!r}",
TypeHintWarning,
stacklevel=get_stacklevel(),
)
return
if annotation is Any or annotation is SubclassableAny or isinstance(value, Mock):
return
# Skip type checks if value is an instance of a class that inherits from Any
if not isclass(value) and SubclassableAny in type(value).__bases__:
return
extras: tuple[Any, ...]
origin_type = get_origin(annotation)
if origin_type is Annotated:
annotation, *extras_ = get_args(annotation)
extras = tuple(extras_)
origin_type = get_origin(annotation)
else:
extras = ()
if origin_type is not None:
args = get_args(annotation)
# Compatibility hack to distinguish between unparametrized and empty tuple
# (tuple[()]), necessary due to https://github.com/python/cpython/issues/91137
if origin_type in (tuple, Tuple) and annotation is not Tuple and not args:
args = ((),)
else:
origin_type = annotation
args = ()
for lookup_func in checker_lookup_functions:
checker = lookup_func(origin_type, args, extras)
if checker:
checker(value, origin_type, args, memo)
return
if isclass(origin_type):
if not isinstance(value, origin_type):
raise TypeCheckError(f"is not an instance of {qualified_name(origin_type)}")
elif type(origin_type) is str: # noqa: E721
warnings.warn(
f"Skipping type check against {origin_type!r}; this looks like a "
f"string-form forward reference imported from another module",
TypeHintWarning,
stacklevel=get_stacklevel(),
)
# Equality checks are applied to these
origin_type_checkers = {
bytes: check_byteslike,
AbstractSet: check_set,
BinaryIO: check_io,
Callable: check_callable,
collections.abc.Callable: check_callable,
complex: check_number,
dict: check_mapping,
Dict: check_mapping,
float: check_number,
frozenset: check_set,
IO: check_io,
list: check_list,
List: check_list,
Mapping: check_mapping,
MutableMapping: check_mapping,
None: check_none,
collections.abc.Mapping: check_mapping,
collections.abc.MutableMapping: check_mapping,
Sequence: check_sequence,
collections.abc.Sequence: check_sequence,
collections.abc.Set: check_set,
set: check_set,
Set: check_set,
TextIO: check_io,
tuple: check_tuple,
Tuple: check_tuple,
type: check_class,
Type: check_class,
Union: check_union,
}
if sys.version_info >= (3, 8):
origin_type_checkers[typing.Literal] = check_literal
if sys.version_info >= (3, 10):
origin_type_checkers[types.UnionType] = check_uniontype
origin_type_checkers[typing.TypeGuard] = check_typeguard
if sys.version_info >= (3, 11):
origin_type_checkers.update(
{typing.LiteralString: check_literal_string, typing.Self: check_self}
)
if typing_extensions is not None:
# On some Python versions, these may simply be re-exports from typing,
# but exactly which Python versions is subject to change,
# so it's best to err on the safe side
# and update the dictionary on all Python versions
# if typing_extensions is installed
origin_type_checkers[typing_extensions.Literal] = check_literal
origin_type_checkers[typing_extensions.LiteralString] = check_literal_string
origin_type_checkers[typing_extensions.Self] = check_self
origin_type_checkers[typing_extensions.TypeGuard] = check_typeguard
def builtin_checker_lookup(
origin_type: Any, args: tuple[Any, ...], extras: tuple[Any, ...]
) -> TypeCheckerCallable | None:
checker = origin_type_checkers.get(origin_type)
if checker is not None:
return checker
elif is_typeddict(origin_type):
return check_typed_dict
elif isclass(origin_type) and issubclass(
origin_type, Tuple # type: ignore[arg-type]
):
# NamedTuple
return check_tuple
elif getattr(origin_type, "_is_protocol", False):
return check_protocol
elif isinstance(origin_type, ParamSpec):
return check_paramspec
elif isinstance(origin_type, TypeVar):
return check_typevar
elif origin_type.__class__ is NewType:
# typing.NewType on Python 3.10+
return check_newtype
elif (
isfunction(origin_type)
and getattr(origin_type, "__module__", None) == "typing"
and getattr(origin_type, "__qualname__", "").startswith("NewType.")
and hasattr(origin_type, "__supertype__")
):
# typing.NewType on Python 3.9 and below
return check_newtype
return None
checker_lookup_functions.append(builtin_checker_lookup)
def load_plugins() -> None:
"""
Load all type checker lookup functions from entry points.
All entry points from the ``typeguard.checker_lookup`` group are loaded, and the
returned lookup functions are added to :data:`typeguard.checker_lookup_functions`.
.. note:: This function is called implicitly on import, unless the
``TYPEGUARD_DISABLE_PLUGIN_AUTOLOAD`` environment variable is present.
"""
for ep in entry_points(group="typeguard.checker_lookup"):
try:
plugin = ep.load()
except Exception as exc:
warnings.warn(
f"Failed to load plugin {ep.name!r}: " f"{qualified_name(exc)}: {exc}",
stacklevel=2,
)
continue
if not callable(plugin):
warnings.warn(
f"Plugin {ep} returned a non-callable object: {plugin!r}", stacklevel=2
)
continue
checker_lookup_functions.insert(0, plugin)
================================================
FILE: metaflow/_vendor/v3_7/typeguard/_config.py
================================================
from __future__ import annotations
from collections.abc import Collection
from dataclasses import dataclass
from enum import Enum, auto
from typing import TYPE_CHECKING, TypeVar
if TYPE_CHECKING:
from ._functions import TypeCheckFailCallback
T = TypeVar("T")
class ForwardRefPolicy(Enum):
"""
Defines how unresolved forward references are handled.
Members:
* ``ERROR``: propagate the :exc:`NameError` when the forward reference lookup fails
* ``WARN``: emit a :class:`~.TypeHintWarning` if the forward reference lookup fails
* ``IGNORE``: silently skip checks for unresolveable forward references
"""
ERROR = auto()
WARN = auto()
IGNORE = auto()
class CollectionCheckStrategy(Enum):
"""
Specifies how thoroughly the contents of collections are type checked.
This has an effect on the following built-in checkers:
* ``AbstractSet``
* ``Dict``
* ``List``
* ``Mapping``
* ``Set``
* ``Tuple[, ...]`` (arbitrarily sized tuples)
Members:
* ``FIRST_ITEM``: check only the first item
* ``ALL_ITEMS``: check all items
"""
FIRST_ITEM = auto()
ALL_ITEMS = auto()
def iterate_samples(self, collection: Collection[T]) -> Collection[T]:
if self is CollectionCheckStrategy.FIRST_ITEM:
if len(collection):
return [next(iter(collection))]
else:
return ()
else:
return collection
@dataclass
class TypeCheckConfiguration:
"""
You can change Typeguard's behavior with these settings.
.. attribute:: typecheck_fail_callback
:type: Callable[[TypeCheckError, TypeCheckMemo], Any]
Callable that is called when type checking fails.
Default: ``None`` (the :exc:`~.TypeCheckError` is raised directly)
.. attribute:: forward_ref_policy
:type: ForwardRefPolicy
Specifies what to do when a forward reference fails to resolve.
Default: ``WARN``
.. attribute:: collection_check_strategy
:type: CollectionCheckStrategy
Specifies how thoroughly the contents of collections (list, dict, etc.) are
type checked.
Default: ``FIRST_ITEM``
.. attribute:: debug_instrumentation
:type: bool
If set to ``True``, the code of modules or functions instrumented by typeguard
is printed to ``sys.stderr`` after the instrumentation is done
Requires Python 3.9 or newer.
Default: ``False``
"""
forward_ref_policy: ForwardRefPolicy = ForwardRefPolicy.WARN
typecheck_fail_callback: TypeCheckFailCallback | None = None
collection_check_strategy: CollectionCheckStrategy = (
CollectionCheckStrategy.FIRST_ITEM
)
debug_instrumentation: bool = False
global_config = TypeCheckConfiguration()
================================================
FILE: metaflow/_vendor/v3_7/typeguard/_decorators.py
================================================
from __future__ import annotations
import ast
import inspect
import sys
from collections.abc import Sequence
from functools import partial
from inspect import isclass, isfunction
from types import CodeType, FrameType, FunctionType
from typing import TYPE_CHECKING, Any, Callable, ForwardRef, TypeVar, cast, overload
from warnings import warn
from ._config import CollectionCheckStrategy, ForwardRefPolicy, global_config
from ._exceptions import InstrumentationWarning
from ._functions import TypeCheckFailCallback
from ._transformer import TypeguardTransformer
from ._utils import Unset, function_name, get_stacklevel, is_method_of, unset
if TYPE_CHECKING:
from typeshed.stdlib.types import _Cell
_F = TypeVar("_F")
def typeguard_ignore(f: _F) -> _F:
"""This decorator is a noop during static type-checking."""
return f
else:
from typing import no_type_check as typeguard_ignore # noqa: F401
T_CallableOrType = TypeVar("T_CallableOrType", bound=Callable[..., Any])
def make_cell(value: object) -> _Cell:
return (lambda: value).__closure__[0] # type: ignore[index]
def find_target_function(
new_code: CodeType, target_path: Sequence[str], firstlineno: int
) -> CodeType | None:
target_name = target_path[0]
for const in new_code.co_consts:
if isinstance(const, CodeType):
if const.co_name == target_name:
if const.co_firstlineno == firstlineno:
return const
elif len(target_path) > 1:
target_code = find_target_function(
const, target_path[1:], firstlineno
)
if target_code:
return target_code
return None
def instrument(f: T_CallableOrType) -> FunctionType | str:
if not getattr(f, "__code__", None):
return "no code associated"
elif not getattr(f, "__module__", None):
return "__module__ attribute is not set"
elif f.__code__.co_filename == "":
return "cannot instrument functions defined in a REPL"
elif hasattr(f, "__wrapped__"):
return (
"@typechecked only supports instrumenting functions wrapped with "
"@classmethod, @staticmethod or @property"
)
target_path = [item for item in f.__qualname__.split(".") if item != ""]
module_source = inspect.getsource(sys.modules[f.__module__])
module_ast = ast.parse(module_source)
instrumentor = TypeguardTransformer(target_path, f.__code__.co_firstlineno)
instrumentor.visit(module_ast)
if not instrumentor.target_node or instrumentor.target_lineno is None:
return "instrumentor did not find the target function"
module_code = compile(module_ast, f.__code__.co_filename, "exec", dont_inherit=True)
new_code = find_target_function(
module_code, target_path, instrumentor.target_lineno
)
if not new_code:
return "cannot find the target function in the AST"
if global_config.debug_instrumentation and sys.version_info >= (3, 9):
# Find the matching AST node, then unparse it to source and print to stdout
print(
f"Source code of {f.__qualname__}() after instrumentation:"
"\n----------------------------------------------",
file=sys.stderr,
)
print(ast.unparse(instrumentor.target_node), file=sys.stderr)
print(
"----------------------------------------------",
file=sys.stderr,
)
closure = f.__closure__
if new_code.co_freevars != f.__code__.co_freevars:
# Create a new closure and find values for the new free variables
frame = cast(FrameType, inspect.currentframe())
frame = cast(FrameType, frame.f_back)
frame_locals = cast(FrameType, frame.f_back).f_locals
cells: list[_Cell] = []
for key in new_code.co_freevars:
if key in instrumentor.names_used_in_annotations:
# Find the value and make a new cell from it
value = frame_locals.get(key) or ForwardRef(key)
cells.append(make_cell(value))
else:
# Reuse the cell from the existing closure
assert f.__closure__
cells.append(f.__closure__[f.__code__.co_freevars.index(key)])
closure = tuple(cells)
new_function = FunctionType(new_code, f.__globals__, f.__name__, closure=closure)
new_function.__module__ = f.__module__
new_function.__name__ = f.__name__
new_function.__qualname__ = f.__qualname__
new_function.__annotations__ = f.__annotations__
new_function.__doc__ = f.__doc__
new_function.__defaults__ = f.__defaults__
new_function.__kwdefaults__ = f.__kwdefaults__
return new_function
@overload
def typechecked(
*,
forward_ref_policy: ForwardRefPolicy | Unset = unset,
typecheck_fail_callback: TypeCheckFailCallback | Unset = unset,
collection_check_strategy: CollectionCheckStrategy | Unset = unset,
debug_instrumentation: bool | Unset = unset,
) -> Callable[[T_CallableOrType], T_CallableOrType]:
...
@overload
def typechecked(target: T_CallableOrType) -> T_CallableOrType:
...
def typechecked(
target: T_CallableOrType | None = None,
*,
forward_ref_policy: ForwardRefPolicy | Unset = unset,
typecheck_fail_callback: TypeCheckFailCallback | Unset = unset,
collection_check_strategy: CollectionCheckStrategy | Unset = unset,
debug_instrumentation: bool | Unset = unset,
) -> Any:
"""
Instrument the target function to perform run-time type checking.
This decorator recompiles the target function, injecting code to type check
arguments, return values, yield values (excluding ``yield from``) and assignments to
annotated local variables.
This can also be used as a class decorator. This will instrument all type annotated
methods, including :func:`@classmethod `,
:func:`@staticmethod `, and :class:`@property ` decorated
methods in the class.
.. note:: When Python is run in optimized mode (``-O`` or ``-OO``, this decorator
is a no-op). This is a feature meant for selectively introducing type checking
into a code base where the checks aren't meant to be run in production.
:param target: the function or class to enable type checking for
:param forward_ref_policy: override for
:attr:`.TypeCheckConfiguration.forward_ref_policy`
:param typecheck_fail_callback: override for
:attr:`.TypeCheckConfiguration.typecheck_fail_callback`
:param collection_check_strategy: override for
:attr:`.TypeCheckConfiguration.collection_check_strategy`
:param debug_instrumentation: override for
:attr:`.TypeCheckConfiguration.debug_instrumentation`
"""
if target is None:
return partial(
typechecked,
forward_ref_policy=forward_ref_policy,
typecheck_fail_callback=typecheck_fail_callback,
collection_check_strategy=collection_check_strategy,
debug_instrumentation=debug_instrumentation,
)
if not __debug__:
return target
if isclass(target):
for key, attr in target.__dict__.items():
if is_method_of(attr, target):
retval = instrument(attr)
if isfunction(retval):
setattr(target, key, retval)
elif isinstance(attr, (classmethod, staticmethod)):
if is_method_of(attr.__func__, target):
retval = instrument(attr.__func__)
if isfunction(retval):
wrapper = attr.__class__(retval)
setattr(target, key, wrapper)
elif isinstance(attr, property):
kwargs: dict[str, Any] = dict(doc=attr.__doc__)
for name in ("fset", "fget", "fdel"):
property_func = kwargs[name] = getattr(attr, name)
if is_method_of(property_func, target):
retval = instrument(property_func)
if isfunction(retval):
kwargs[name] = retval
setattr(target, key, attr.__class__(**kwargs))
return target
# Find either the first Python wrapper or the actual function
wrapper_class: type[classmethod[Any, Any, Any]] | type[
staticmethod[Any, Any]
] | None = None
if isinstance(target, (classmethod, staticmethod)):
wrapper_class = target.__class__
target = target.__func__
retval = instrument(target)
if isinstance(retval, str):
warn(
f"{retval} -- not typechecking {function_name(target)}",
InstrumentationWarning,
stacklevel=get_stacklevel(),
)
return target
if wrapper_class is None:
return retval
else:
return wrapper_class(retval)
================================================
FILE: metaflow/_vendor/v3_7/typeguard/_exceptions.py
================================================
from collections import deque
from typing import Deque
class TypeHintWarning(UserWarning):
"""
A warning that is emitted when a type hint in string form could not be resolved to
an actual type.
"""
class TypeCheckWarning(UserWarning):
"""Emitted by typeguard's type checkers when a type mismatch is detected."""
def __init__(self, message: str):
super().__init__(message)
class InstrumentationWarning(UserWarning):
"""Emitted when there's a problem with instrumenting a function for type checks."""
def __init__(self, message: str):
super().__init__(message)
class TypeCheckError(Exception):
"""
Raised by typeguard's type checkers when a type mismatch is detected.
"""
def __init__(self, message: str):
super().__init__(message)
self._path: Deque[str] = deque()
def append_path_element(self, element: str) -> None:
self._path.append(element)
def __str__(self) -> str:
if self._path:
return " of ".join(self._path) + " " + str(self.args[0])
else:
return str(self.args[0])
================================================
FILE: metaflow/_vendor/v3_7/typeguard/_functions.py
================================================
from __future__ import annotations
import sys
import warnings
from typing import Any, Callable, NoReturn, TypeVar, Union, overload
from . import _suppression
from ._checkers import BINARY_MAGIC_METHODS, check_type_internal
from ._config import (
CollectionCheckStrategy,
ForwardRefPolicy,
TypeCheckConfiguration,
)
from ._exceptions import TypeCheckError, TypeCheckWarning
from ._memo import TypeCheckMemo
from ._utils import get_stacklevel, qualified_name
if sys.version_info >= (3, 11):
from typing import Literal, Never, TypeAlias
else:
from metaflow._vendor.v3_7.typing_extensions import Literal, Never, TypeAlias
T = TypeVar("T")
TypeCheckFailCallback: TypeAlias = Callable[[TypeCheckError, TypeCheckMemo], Any]
@overload
def check_type(
value: object,
expected_type: type[T],
*,
forward_ref_policy: ForwardRefPolicy = ...,
typecheck_fail_callback: TypeCheckFailCallback | None = ...,
collection_check_strategy: CollectionCheckStrategy = ...,
) -> T:
...
@overload
def check_type(
value: object,
expected_type: Any,
*,
forward_ref_policy: ForwardRefPolicy = ...,
typecheck_fail_callback: TypeCheckFailCallback | None = ...,
collection_check_strategy: CollectionCheckStrategy = ...,
) -> Any:
...
def check_type(
value: object,
expected_type: Any,
*,
forward_ref_policy: ForwardRefPolicy = TypeCheckConfiguration().forward_ref_policy,
typecheck_fail_callback: (TypeCheckFailCallback | None) = (
TypeCheckConfiguration().typecheck_fail_callback
),
collection_check_strategy: CollectionCheckStrategy = (
TypeCheckConfiguration().collection_check_strategy
),
) -> Any:
"""
Ensure that ``value`` matches ``expected_type``.
The types from the :mod:`typing` module do not support :func:`isinstance` or
:func:`issubclass` so a number of type specific checks are required. This function
knows which checker to call for which type.
This function wraps :func:`~.check_type_internal` in the following ways:
* Respects type checking suppression (:func:`~.suppress_type_checks`)
* Forms a :class:`~.TypeCheckMemo` from the current stack frame
* Calls the configured type check fail callback if the check fails
Note that this function is independent of the globally shared configuration in
:data:`typeguard.config`. This means that usage within libraries is safe from being
affected configuration changes made by other libraries or by the integrating
application. Instead, configuration options have the same default values as their
corresponding fields in :class:`TypeCheckConfiguration`.
:param value: value to be checked against ``expected_type``
:param expected_type: a class or generic type instance, or a tuple of such things
:param forward_ref_policy: see :attr:`TypeCheckConfiguration.forward_ref_policy`
:param typecheck_fail_callback:
see :attr`TypeCheckConfiguration.typecheck_fail_callback`
:param collection_check_strategy:
see :attr:`TypeCheckConfiguration.collection_check_strategy`
:return: ``value``, unmodified
:raises TypeCheckError: if there is a type mismatch
"""
if type(expected_type) is tuple:
expected_type = Union[expected_type]
config = TypeCheckConfiguration(
forward_ref_policy=forward_ref_policy,
typecheck_fail_callback=typecheck_fail_callback,
collection_check_strategy=collection_check_strategy,
)
if _suppression.type_checks_suppressed or expected_type is Any:
return value
frame = sys._getframe(1)
memo = TypeCheckMemo(frame.f_globals, frame.f_locals, config=config)
try:
check_type_internal(value, expected_type, memo)
except TypeCheckError as exc:
exc.append_path_element(qualified_name(value, add_class_prefix=True))
if config.typecheck_fail_callback:
config.typecheck_fail_callback(exc, memo)
else:
raise
return value
def check_argument_types(
func_name: str,
arguments: dict[str, tuple[Any, Any]],
memo: TypeCheckMemo,
) -> Literal[True]:
if _suppression.type_checks_suppressed:
return True
for argname, (value, annotation) in arguments.items():
if annotation is NoReturn or annotation is Never:
exc = TypeCheckError(
f"{func_name}() was declared never to be called but it was"
)
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise exc
try:
check_type_internal(value, annotation, memo)
except TypeCheckError as exc:
qualname = qualified_name(value, add_class_prefix=True)
exc.append_path_element(f'argument "{argname}" ({qualname})')
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise
return True
def check_return_type(
func_name: str,
retval: T,
annotation: Any,
memo: TypeCheckMemo,
) -> T:
if _suppression.type_checks_suppressed:
return retval
if annotation is NoReturn or annotation is Never:
exc = TypeCheckError(f"{func_name}() was declared never to return but it did")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise exc
try:
check_type_internal(retval, annotation, memo)
except TypeCheckError as exc:
# Allow NotImplemented if this is a binary magic method (__eq__() et al)
if retval is NotImplemented and annotation is bool:
# This does (and cannot) not check if it's actually a method
func_name = func_name.rsplit(".", 1)[-1]
if func_name in BINARY_MAGIC_METHODS:
return retval
qualname = qualified_name(retval, add_class_prefix=True)
exc.append_path_element(f"the return value ({qualname})")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise
return retval
def check_send_type(
func_name: str,
sendval: T,
annotation: Any,
memo: TypeCheckMemo,
) -> T:
if _suppression.type_checks_suppressed:
return sendval
if annotation is NoReturn or annotation is Never:
exc = TypeCheckError(
f"{func_name}() was declared never to be sent a value to but it was"
)
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise exc
try:
check_type_internal(sendval, annotation, memo)
except TypeCheckError as exc:
qualname = qualified_name(sendval, add_class_prefix=True)
exc.append_path_element(f"the value sent to generator ({qualname})")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise
return sendval
def check_yield_type(
func_name: str,
yieldval: T,
annotation: Any,
memo: TypeCheckMemo,
) -> T:
if _suppression.type_checks_suppressed:
return yieldval
if annotation is NoReturn or annotation is Never:
exc = TypeCheckError(f"{func_name}() was declared never to yield but it did")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise exc
try:
check_type_internal(yieldval, annotation, memo)
except TypeCheckError as exc:
qualname = qualified_name(yieldval, add_class_prefix=True)
exc.append_path_element(f"the yielded value ({qualname})")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise
return yieldval
def check_variable_assignment(
value: object, varname: str, annotation: Any, memo: TypeCheckMemo
) -> Any:
if _suppression.type_checks_suppressed:
return value
try:
check_type_internal(value, annotation, memo)
except TypeCheckError as exc:
qualname = qualified_name(value, add_class_prefix=True)
exc.append_path_element(f"value assigned to {varname} ({qualname})")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise
return value
def check_multi_variable_assignment(
value: Any, targets: list[dict[str, Any]], memo: TypeCheckMemo
) -> Any:
if max(len(target) for target in targets) == 1:
iterated_values = [value]
else:
iterated_values = list(value)
if not _suppression.type_checks_suppressed:
for expected_types in targets:
value_index = 0
for ann_index, (varname, expected_type) in enumerate(
expected_types.items()
):
if varname.startswith("*"):
varname = varname[1:]
keys_left = len(expected_types) - 1 - ann_index
next_value_index = len(iterated_values) - keys_left
obj: object = iterated_values[value_index:next_value_index]
value_index = next_value_index
else:
obj = iterated_values[value_index]
value_index += 1
try:
check_type_internal(obj, expected_type, memo)
except TypeCheckError as exc:
qualname = qualified_name(obj, add_class_prefix=True)
exc.append_path_element(f"value assigned to {varname} ({qualname})")
if memo.config.typecheck_fail_callback:
memo.config.typecheck_fail_callback(exc, memo)
else:
raise
return iterated_values[0] if len(iterated_values) == 1 else iterated_values
def warn_on_error(exc: TypeCheckError, memo: TypeCheckMemo) -> None:
"""
Emit a warning on a type mismatch.
This is intended to be used as an error handler in
:attr:`TypeCheckConfiguration.typecheck_fail_callback`.
"""
warnings.warn(TypeCheckWarning(str(exc)), stacklevel=get_stacklevel())
================================================
FILE: metaflow/_vendor/v3_7/typeguard/_importhook.py
================================================
from __future__ import annotations
import ast
import sys
import types
from collections.abc import Callable, Iterable
from importlib.abc import MetaPathFinder
from importlib.machinery import ModuleSpec, SourceFileLoader
from importlib.util import cache_from_source, decode_source
from inspect import isclass
from os import PathLike
from types import CodeType, ModuleType, TracebackType
from typing import Sequence, TypeVar
from unittest.mock import patch
from ._config import global_config
from ._transformer import TypeguardTransformer
if sys.version_info >= (3, 12):
from collections.abc import Buffer
else:
from metaflow._vendor.v3_7.typing_extensions import Buffer
if sys.version_info >= (3, 11):
from typing import ParamSpec
else:
from metaflow._vendor.v3_7.typing_extensions import ParamSpec
if sys.version_info >= (3, 10):
from importlib.metadata import PackageNotFoundError, version
else:
from metaflow._vendor.v3_7.importlib_metadata import PackageNotFoundError, version
try:
OPTIMIZATION = "typeguard" + "".join(version("typeguard").split(".")[:3])
except PackageNotFoundError:
OPTIMIZATION = "typeguard"
P = ParamSpec("P")
T = TypeVar("T")
# The name of this function is magical
def _call_with_frames_removed(
f: Callable[P, T], *args: P.args, **kwargs: P.kwargs
) -> T:
return f(*args, **kwargs)
def optimized_cache_from_source(path: str, debug_override: bool | None = None) -> str:
return cache_from_source(path, debug_override, optimization=OPTIMIZATION)
class TypeguardLoader(SourceFileLoader):
@staticmethod
def source_to_code(
data: Buffer | str | ast.Module | ast.Expression | ast.Interactive,
path: Buffer | str | PathLike[str] = "",
) -> CodeType:
if isinstance(data, (ast.Module, ast.Expression, ast.Interactive)):
tree = data
else:
if isinstance(data, str):
source = data
else:
source = decode_source(data)
tree = _call_with_frames_removed(
ast.parse,
source,
path,
"exec",
)
tree = TypeguardTransformer().visit(tree)
ast.fix_missing_locations(tree)
if global_config.debug_instrumentation and sys.version_info >= (3, 9):
print(
f"Source code of {path!r} after instrumentation:\n"
"----------------------------------------------",
file=sys.stderr,
)
print(ast.unparse(tree), file=sys.stderr)
print("----------------------------------------------", file=sys.stderr)
return _call_with_frames_removed(
compile, tree, path, "exec", 0, dont_inherit=True
)
def exec_module(self, module: ModuleType) -> None:
# Use a custom optimization marker – the import lock should make this monkey
# patch safe
with patch(
"importlib._bootstrap_external.cache_from_source",
optimized_cache_from_source,
):
super().exec_module(module)
class TypeguardFinder(MetaPathFinder):
"""
Wraps another path finder and instruments the module with
:func:`@typechecked ` if :meth:`should_instrument` returns
``True``.
Should not be used directly, but rather via :func:`~.install_import_hook`.
.. versionadded:: 2.6
"""
def __init__(self, packages: list[str] | None, original_pathfinder: MetaPathFinder):
self.packages = packages
self._original_pathfinder = original_pathfinder
def find_spec(
self,
fullname: str,
path: Sequence[str] | None,
target: types.ModuleType | None = None,
) -> ModuleSpec | None:
if self.should_instrument(fullname):
spec = self._original_pathfinder.find_spec(fullname, path, target)
if spec is not None and isinstance(spec.loader, SourceFileLoader):
spec.loader = TypeguardLoader(spec.loader.name, spec.loader.path)
return spec
return None
def should_instrument(self, module_name: str) -> bool:
"""
Determine whether the module with the given name should be instrumented.
:param module_name: full name of the module that is about to be imported (e.g.
``xyz.abc``)
"""
if self.packages is None:
return True
for package in self.packages:
if module_name == package or module_name.startswith(package + "."):
return True
return False
class ImportHookManager:
"""
A handle that can be used to uninstall the Typeguard import hook.
"""
def __init__(self, hook: MetaPathFinder):
self.hook = hook
def __enter__(self) -> None:
pass
def __exit__(
self,
exc_type: type[BaseException],
exc_val: BaseException,
exc_tb: TracebackType,
) -> None:
self.uninstall()
def uninstall(self) -> None:
"""Uninstall the import hook."""
try:
sys.meta_path.remove(self.hook)
except ValueError:
pass # already removed
def install_import_hook(
packages: Iterable[str] | None = None,
*,
cls: type[TypeguardFinder] = TypeguardFinder,
) -> ImportHookManager:
"""
Install an import hook that instruments functions for automatic type checking.
This only affects modules loaded **after** this hook has been installed.
:param packages: an iterable of package names to instrument, or ``None`` to
instrument all packages
:param cls: a custom meta path finder class
:return: a context manager that uninstalls the hook on exit (or when you call
``.uninstall()``)
.. versionadded:: 2.6
"""
if packages is None:
target_packages: list[str] | None = None
elif isinstance(packages, str):
target_packages = [packages]
else:
target_packages = list(packages)
for finder in sys.meta_path:
if (
isclass(finder)
and finder.__name__ == "PathFinder"
and hasattr(finder, "find_spec")
):
break
else:
raise RuntimeError("Cannot find a PathFinder in sys.meta_path")
hook = cls(target_packages, finder)
sys.meta_path.insert(0, hook)
return ImportHookManager(hook)
================================================
FILE: metaflow/_vendor/v3_7/typeguard/_memo.py
================================================
from __future__ import annotations
from typing import Any
from metaflow._vendor.v3_7.typeguard._config import TypeCheckConfiguration, global_config
class TypeCheckMemo:
"""
Contains information necessary for type checkers to do their work.
.. attribute:: globals
:type: dict[str, Any]
Dictionary of global variables to use for resolving forward references.
.. attribute:: locals
:type: dict[str, Any]
Dictionary of local variables to use for resolving forward references.
.. attribute:: self_type
:type: type | None
When running type checks within an instance method or class method, this is the
class object that the first argument (usually named ``self`` or ``cls``) refers
to.
.. attribute:: config
:type: TypeCheckConfiguration
Contains the configuration for a particular set of type checking operations.
"""
__slots__ = "globals", "locals", "self_type", "config"
def __init__(
self,
globals: dict[str, Any],
locals: dict[str, Any],
*,
self_type: type | None = None,
config: TypeCheckConfiguration = global_config,
):
self.globals = globals
self.locals = locals
self.self_type = self_type
self.config = config
================================================
FILE: metaflow/_vendor/v3_7/typeguard/_pytest_plugin.py
================================================
from __future__ import annotations
import sys
import warnings
from pytest import Config, Parser
from metaflow._vendor.v3_7.typeguard._config import CollectionCheckStrategy, ForwardRefPolicy, global_config
from metaflow._vendor.v3_7.typeguard._exceptions import InstrumentationWarning
from metaflow._vendor.v3_7.typeguard._importhook import install_import_hook
from metaflow._vendor.v3_7.typeguard._utils import qualified_name, resolve_reference
def pytest_addoption(parser: Parser) -> None:
group = parser.getgroup("typeguard")
group.addoption(
"--typeguard-packages",
action="store",
help="comma separated name list of packages and modules to instrument for "
"type checking, or :all: to instrument all modules loaded after typeguard",
)
group.addoption(
"--typeguard-debug-instrumentation",
action="store_true",
help="print all instrumented code to stderr",
)
group.addoption(
"--typeguard-typecheck-fail-callback",
action="store",
help=(
"a module:varname (e.g. typeguard:warn_on_error) reference to a function "
"that is called (with the exception, and memo object as arguments) to "
"handle a TypeCheckError"
),
)
group.addoption(
"--typeguard-forward-ref-policy",
action="store",
choices=list(ForwardRefPolicy.__members__),
help=(
"determines how to deal with unresolveable forward references in type "
"annotations"
),
)
group.addoption(
"--typeguard-collection-check-strategy",
action="store",
choices=list(CollectionCheckStrategy.__members__),
help="determines how thoroughly to check collections (list, dict, etc)",
)
def pytest_configure(config: Config) -> None:
packages_option = config.getoption("typeguard_packages")
if packages_option:
if packages_option == ":all:":
packages: list[str] | None = None
else:
packages = [pkg.strip() for pkg in packages_option.split(",")]
already_imported_packages = sorted(
package for package in packages if package in sys.modules
)
if already_imported_packages:
warnings.warn(
f"typeguard cannot check these packages because they are already "
f"imported: {', '.join(already_imported_packages)}",
InstrumentationWarning,
stacklevel=1,
)
install_import_hook(packages=packages)
debug_option = config.getoption("typeguard_debug_instrumentation")
if debug_option:
global_config.debug_instrumentation = True
fail_callback_option = config.getoption("typeguard_typecheck_fail_callback")
if fail_callback_option:
callback = resolve_reference(fail_callback_option)
if not callable(callback):
raise TypeError(
f"{fail_callback_option} ({qualified_name(callback.__class__)}) is not "
f"a callable"
)
global_config.typecheck_fail_callback = callback
forward_ref_policy_option = config.getoption("typeguard_forward_ref_policy")
if forward_ref_policy_option:
forward_ref_policy = ForwardRefPolicy.__members__[forward_ref_policy_option]
global_config.forward_ref_policy = forward_ref_policy
collection_check_strategy_option = config.getoption(
"typeguard_collection_check_strategy"
)
if collection_check_strategy_option:
collection_check_strategy = CollectionCheckStrategy.__members__[
collection_check_strategy_option
]
global_config.collection_check_strategy = collection_check_strategy
================================================
FILE: metaflow/_vendor/v3_7/typeguard/_suppression.py
================================================
from __future__ import annotations
import sys
from collections.abc import Callable, Generator
from contextlib import contextmanager
from functools import update_wrapper
from threading import Lock
from typing import ContextManager, TypeVar, overload
if sys.version_info >= (3, 10):
from typing import ParamSpec
else:
from metaflow._vendor.v3_7.typing_extensions import ParamSpec
P = ParamSpec("P")
T = TypeVar("T")
type_checks_suppressed = 0
type_checks_suppress_lock = Lock()
@overload
def suppress_type_checks(func: Callable[P, T]) -> Callable[P, T]:
...
@overload
def suppress_type_checks() -> ContextManager[None]:
...
def suppress_type_checks(
func: Callable[P, T] | None = None
) -> Callable[P, T] | ContextManager[None]:
"""
Temporarily suppress all type checking.
This function has two operating modes, based on how it's used:
#. as a context manager (``with suppress_type_checks(): ...``)
#. as a decorator (``@suppress_type_checks``)
When used as a context manager, :func:`check_type` and any automatically
instrumented functions skip the actual type checking. These context managers can be
nested.
When used as a decorator, all type checking is suppressed while the function is
running.
Type checking will resume once no more context managers are active and no decorated
functions are running.
Both operating modes are thread-safe.
"""
def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
global type_checks_suppressed
with type_checks_suppress_lock:
type_checks_suppressed += 1
assert func is not None
try:
return func(*args, **kwargs)
finally:
with type_checks_suppress_lock:
type_checks_suppressed -= 1
def cm() -> Generator[None, None, None]:
global type_checks_suppressed
with type_checks_suppress_lock:
type_checks_suppressed += 1
try:
yield
finally:
with type_checks_suppress_lock:
type_checks_suppressed -= 1
if func is None:
# Context manager mode
return contextmanager(cm)()
else:
# Decorator mode
update_wrapper(wrapper, func)
return wrapper
================================================
FILE: metaflow/_vendor/v3_7/typeguard/_transformer.py
================================================
from __future__ import annotations
import ast
import builtins
import sys
import typing
from ast import (
AST,
Add,
AnnAssign,
Assign,
AsyncFunctionDef,
Attribute,
AugAssign,
BinOp,
BitAnd,
BitOr,
BitXor,
Call,
ClassDef,
Constant,
Dict,
Div,
Expr,
Expression,
FloorDiv,
FunctionDef,
If,
Import,
ImportFrom,
Index,
List,
Load,
LShift,
MatMult,
Mod,
Module,
Mult,
Name,
NodeTransformer,
NodeVisitor,
Pass,
Pow,
Return,
RShift,
Starred,
Store,
Str,
Sub,
Subscript,
Tuple,
Yield,
YieldFrom,
alias,
copy_location,
expr,
fix_missing_locations,
keyword,
walk,
)
from collections import defaultdict
from collections.abc import Generator, Sequence
from contextlib import contextmanager
from copy import deepcopy
from dataclasses import dataclass, field
from typing import Any, ClassVar, cast, overload
if sys.version_info >= (3, 8):
from ast import NamedExpr
generator_names = (
"typing.Generator",
"collections.abc.Generator",
"typing.Iterator",
"collections.abc.Iterator",
"typing.Iterable",
"collections.abc.Iterable",
"typing.AsyncIterator",
"collections.abc.AsyncIterator",
"typing.AsyncIterable",
"collections.abc.AsyncIterable",
"typing.AsyncGenerator",
"collections.abc.AsyncGenerator",
)
anytype_names = (
"typing.Any",
"typing_extensions.Any",
)
literal_names = (
"typing.Literal",
"typing_extensions.Literal",
)
annotated_names = (
"typing.Annotated",
"typing_extensions.Annotated",
)
ignore_decorators = (
"typing.no_type_check",
"typeguard.typeguard_ignore",
)
aug_assign_functions = {
Add: "iadd",
Sub: "isub",
Mult: "imul",
MatMult: "imatmul",
Div: "itruediv",
FloorDiv: "ifloordiv",
Mod: "imod",
Pow: "ipow",
LShift: "ilshift",
RShift: "irshift",
BitAnd: "iand",
BitXor: "ixor",
BitOr: "ior",
}
@dataclass
class TransformMemo:
node: Module | ClassDef | FunctionDef | AsyncFunctionDef | None
parent: TransformMemo | None
path: tuple[str, ...]
joined_path: Constant = field(init=False)
return_annotation: expr | None = None
yield_annotation: expr | None = None
send_annotation: expr | None = None
is_async: bool = False
local_names: set[str] = field(init=False, default_factory=set)
imported_names: dict[str, str] = field(init=False, default_factory=dict)
ignored_names: set[str] = field(init=False, default_factory=set)
load_names: defaultdict[str, dict[str, Name]] = field(
init=False, default_factory=lambda: defaultdict(dict)
)
has_yield_expressions: bool = field(init=False, default=False)
has_return_expressions: bool = field(init=False, default=False)
memo_var_name: Name | None = field(init=False, default=None)
should_instrument: bool = field(init=False, default=True)
variable_annotations: dict[str, expr] = field(init=False, default_factory=dict)
configuration_overrides: dict[str, Any] = field(init=False, default_factory=dict)
code_inject_index: int = field(init=False, default=0)
def __post_init__(self) -> None:
elements: list[str] = []
memo = self
while isinstance(memo.node, (ClassDef, FunctionDef, AsyncFunctionDef)):
elements.insert(0, memo.node.name)
if not memo.parent:
break
memo = memo.parent
if isinstance(memo.node, (FunctionDef, AsyncFunctionDef)):
elements.insert(0, "")
self.joined_path = Constant(".".join(elements))
# Figure out where to insert instrumentation code
if self.node:
for index, child in enumerate(self.node.body):
if isinstance(child, ImportFrom) and child.module == "__future__":
# (module only) __future__ imports must come first
continue
elif isinstance(child, Expr):
if isinstance(child.value, Constant) and isinstance(
child.value.value, str
):
continue # docstring
elif sys.version_info < (3, 8) and isinstance(child.value, Str):
continue # docstring
self.code_inject_index = index
break
def get_unused_name(self, name: str) -> str:
memo: TransformMemo | None = self
while memo is not None:
if name in memo.local_names:
memo = self
name += "_"
else:
memo = memo.parent
self.local_names.add(name)
return name
def is_ignored_name(self, expression: expr | Expr | None) -> bool:
top_expression = (
expression.value if isinstance(expression, Expr) else expression
)
if isinstance(top_expression, Attribute) and isinstance(
top_expression.value, Name
):
name = top_expression.value.id
elif isinstance(top_expression, Name):
name = top_expression.id
else:
return False
memo: TransformMemo | None = self
while memo is not None:
if name in memo.ignored_names:
return True
memo = memo.parent
return False
def get_memo_name(self) -> Name:
if not self.memo_var_name:
self.memo_var_name = Name(id="memo", ctx=Load())
return self.memo_var_name
def get_import(self, module: str, name: str) -> Name:
if module in self.load_names and name in self.load_names[module]:
return self.load_names[module][name]
qualified_name = f"{module}.{name}"
if name in self.imported_names and self.imported_names[name] == qualified_name:
return Name(id=name, ctx=Load())
alias = self.get_unused_name(name)
node = self.load_names[module][name] = Name(id=alias, ctx=Load())
self.imported_names[name] = qualified_name
return node
def insert_imports(self, node: Module | FunctionDef | AsyncFunctionDef) -> None:
"""Insert imports needed by injected code."""
if not self.load_names:
return
# Insert imports after any "from __future__ ..." imports and any docstring
for modulename, names in self.load_names.items():
aliases = [
alias(orig_name, new_name.id if orig_name != new_name.id else None)
for orig_name, new_name in sorted(names.items())
]
node.body.insert(self.code_inject_index, ImportFrom(modulename, aliases, 0))
def name_matches(self, expression: expr | Expr | None, *names: str) -> bool:
if expression is None:
return False
path: list[str] = []
top_expression = (
expression.value if isinstance(expression, Expr) else expression
)
if isinstance(top_expression, Subscript):
top_expression = top_expression.value
elif isinstance(top_expression, Call):
top_expression = top_expression.func
while isinstance(top_expression, Attribute):
path.insert(0, top_expression.attr)
top_expression = top_expression.value
if not isinstance(top_expression, Name):
return False
if top_expression.id in self.imported_names:
translated = self.imported_names[top_expression.id]
elif hasattr(builtins, top_expression.id):
translated = "builtins." + top_expression.id
else:
translated = top_expression.id
path.insert(0, translated)
joined_path = ".".join(path)
if joined_path in names:
return True
elif self.parent:
return self.parent.name_matches(expression, *names)
else:
return False
def get_config_keywords(self) -> list[keyword]:
if self.parent and isinstance(self.parent.node, ClassDef):
overrides = self.parent.configuration_overrides.copy()
else:
overrides = {}
overrides.update(self.configuration_overrides)
return [keyword(key, value) for key, value in overrides.items()]
class NameCollector(NodeVisitor):
def __init__(self) -> None:
self.names: set[str] = set()
def visit_Import(self, node: Import) -> None:
for name in node.names:
self.names.add(name.asname or name.name)
def visit_ImportFrom(self, node: ImportFrom) -> None:
for name in node.names:
self.names.add(name.asname or name.name)
def visit_Assign(self, node: Assign) -> None:
for target in node.targets:
if isinstance(target, Name):
self.names.add(target.id)
def visit_NamedExpr(self, node: NamedExpr) -> Any:
if isinstance(node.target, Name):
self.names.add(node.target.id)
def visit_FunctionDef(self, node: FunctionDef) -> None:
pass
def visit_ClassDef(self, node: ClassDef) -> None:
pass
class GeneratorDetector(NodeVisitor):
"""Detects if a function node is a generator function."""
contains_yields: bool = False
in_root_function: bool = False
def visit_Yield(self, node: Yield) -> Any:
self.contains_yields = True
def visit_YieldFrom(self, node: YieldFrom) -> Any:
self.contains_yields = True
def visit_ClassDef(self, node: ClassDef) -> Any:
pass
def visit_FunctionDef(self, node: FunctionDef | AsyncFunctionDef) -> Any:
if not self.in_root_function:
self.in_root_function = True
self.generic_visit(node)
self.in_root_function = False
def visit_AsyncFunctionDef(self, node: AsyncFunctionDef) -> Any:
self.visit_FunctionDef(node)
class AnnotationTransformer(NodeTransformer):
type_substitutions: ClassVar[dict[str, tuple[str, str]]] = {
"builtins.dict": ("typing", "Dict"),
"builtins.list": ("typing", "List"),
"builtins.tuple": ("typing", "Tuple"),
"builtins.set": ("typing", "Set"),
"builtins.frozenset": ("typing", "FrozenSet"),
}
def __init__(self, transformer: TypeguardTransformer):
self.transformer = transformer
self._memo = transformer._memo
self._level = 0
def visit(self, node: AST) -> Any:
self._level += 1
new_node = super().visit(node)
self._level -= 1
if isinstance(new_node, Expression) and not hasattr(new_node, "body"):
return None
# Return None if this new node matches a variation of typing.Any
if (
self._level == 0
and isinstance(new_node, expr)
and self._memo.name_matches(new_node, *anytype_names)
):
return None
return new_node
def generic_visit(self, node: AST) -> AST:
if isinstance(node, expr) and self._memo.name_matches(node, *literal_names):
return node
return super().generic_visit(node)
def visit_BinOp(self, node: BinOp) -> Any:
self.generic_visit(node)
if isinstance(node.op, BitOr):
# Return Any if either side is Any
if self._memo.name_matches(node.left, *anytype_names):
return node.left
elif self._memo.name_matches(node.right, *anytype_names):
return node.right
if sys.version_info < (3, 10):
union_name = self.transformer._get_import("typing", "Union")
return Subscript(
value=union_name,
slice=Index(
Tuple(elts=[node.left, node.right], ctx=Load()), ctx=Load()
),
ctx=Load(),
)
return node
def visit_Attribute(self, node: Attribute) -> Any:
if self._memo.is_ignored_name(node):
return None
return node
def visit_Subscript(self, node: Subscript) -> Any:
if self._memo.is_ignored_name(node.value):
return None
# The subscript of typing(_extensions).Literal can be any arbitrary string, so
# don't try to evaluate it as code
if node.slice:
if isinstance(node.slice, Index):
# Python 3.7 and 3.8
slice_value = node.slice.value # type: ignore[attr-defined]
else:
slice_value = node.slice
if isinstance(slice_value, Tuple):
if self._memo.name_matches(node.value, *annotated_names):
# Only treat the first argument to typing.Annotated as a potential
# forward reference
items = cast(
typing.List[expr],
[self.generic_visit(slice_value.elts[0])]
+ slice_value.elts[1:],
)
else:
items = cast(
typing.List[expr],
[self.generic_visit(item) for item in slice_value.elts],
)
# If this is a Union and any of the items is Any, erase the entire
# annotation
if self._memo.name_matches(node.value, "typing.Union") and any(
isinstance(item, expr)
and self._memo.name_matches(item, *anytype_names)
for item in items
):
return None
# If all items in the subscript were Any, erase the subscript entirely
if all(item is None for item in items):
return node.value
for index, item in enumerate(items):
if item is None:
items[index] = self.transformer._get_import("typing", "Any")
slice_value.elts = items
else:
self.generic_visit(node)
# If the transformer erased the slice entirely, just return the node
# value without the subscript (unless it's Optional, in which case erase
# the node entirely
if self._memo.name_matches(node.value, "typing.Optional"):
return None
elif sys.version_info >= (3, 9) and not hasattr(node, "slice"):
return node.value
elif sys.version_info < (3, 9) and not hasattr(node.slice, "value"):
return node.value
return node
def visit_Name(self, node: Name) -> Any:
if self._memo.is_ignored_name(node):
return None
if sys.version_info < (3, 9):
for typename, substitute in self.type_substitutions.items():
if self._memo.name_matches(node, typename):
new_node = self.transformer._get_import(*substitute)
return copy_location(new_node, node)
return node
def visit_Call(self, node: Call) -> Any:
# Don't recurse into calls
return node
def visit_Constant(self, node: Constant) -> Any:
if isinstance(node.value, str):
expression = ast.parse(node.value, mode="eval")
new_node = self.visit(expression)
if new_node:
return copy_location(new_node.body, node)
else:
return None
return node
def visit_Str(self, node: Str) -> Any:
# Only used on Python 3.7
expression = ast.parse(node.s, mode="eval")
new_node = self.visit(expression)
if new_node:
return copy_location(new_node.body, node)
else:
return None
class TypeguardTransformer(NodeTransformer):
def __init__(
self, target_path: Sequence[str] | None = None, target_lineno: int | None = None
) -> None:
self._target_path = tuple(target_path) if target_path else None
self._memo = self._module_memo = TransformMemo(None, None, ())
self.names_used_in_annotations: set[str] = set()
self.target_node: FunctionDef | AsyncFunctionDef | None = None
self.target_lineno = target_lineno
@contextmanager
def _use_memo(
self, node: ClassDef | FunctionDef | AsyncFunctionDef
) -> Generator[None, Any, None]:
new_memo = TransformMemo(node, self._memo, self._memo.path + (node.name,))
if isinstance(node, (FunctionDef, AsyncFunctionDef)):
new_memo.should_instrument = (
self._target_path is None or new_memo.path == self._target_path
)
if new_memo.should_instrument:
# Check if the function is a generator function
detector = GeneratorDetector()
detector.visit(node)
# Extract yield, send and return types where possible from a subscripted
# annotation like Generator[int, str, bool]
return_annotation = deepcopy(node.returns)
if detector.contains_yields and new_memo.name_matches(
return_annotation, *generator_names
):
if isinstance(return_annotation, Subscript):
annotation_slice = return_annotation.slice
# Python < 3.9
if isinstance(annotation_slice, Index):
annotation_slice = (
annotation_slice.value # type: ignore[attr-defined]
)
if isinstance(annotation_slice, Tuple):
items = annotation_slice.elts
else:
items = [annotation_slice]
if len(items) > 0:
new_memo.yield_annotation = self._convert_annotation(
items[0]
)
if len(items) > 1:
new_memo.send_annotation = self._convert_annotation(
items[1]
)
if len(items) > 2:
new_memo.return_annotation = self._convert_annotation(
items[2]
)
else:
new_memo.return_annotation = self._convert_annotation(
return_annotation
)
if isinstance(node, AsyncFunctionDef):
new_memo.is_async = True
old_memo = self._memo
self._memo = new_memo
yield
self._memo = old_memo
def _get_import(self, module: str, name: str) -> Name:
memo = self._memo if self._target_path else self._module_memo
return memo.get_import(module, name)
@overload
def _convert_annotation(self, annotation: None) -> None:
...
@overload
def _convert_annotation(self, annotation: expr) -> expr:
...
def _convert_annotation(self, annotation: expr | None) -> expr | None:
if annotation is None:
return None
# Convert PEP 604 unions (x | y) and generic built-in collections where
# necessary, and undo forward references
new_annotation = cast(expr, AnnotationTransformer(self).visit(annotation))
if isinstance(new_annotation, expr):
new_annotation = ast.copy_location(new_annotation, annotation)
# Store names used in the annotation
names = {node.id for node in walk(new_annotation) if isinstance(node, Name)}
self.names_used_in_annotations.update(names)
return new_annotation
def visit_Name(self, node: Name) -> Name:
self._memo.local_names.add(node.id)
return node
def visit_Module(self, node: Module) -> Module:
self.generic_visit(node)
self._memo.insert_imports(node)
fix_missing_locations(node)
return node
def visit_Import(self, node: Import) -> Import:
for name in node.names:
self._memo.local_names.add(name.asname or name.name)
self._memo.imported_names[name.asname or name.name] = name.name
return node
def visit_ImportFrom(self, node: ImportFrom) -> ImportFrom:
for name in node.names:
if name.name != "*":
alias = name.asname or name.name
self._memo.local_names.add(alias)
self._memo.imported_names[alias] = f"{node.module}.{name.name}"
return node
def visit_ClassDef(self, node: ClassDef) -> ClassDef | None:
self._memo.local_names.add(node.name)
# Eliminate top level classes not belonging to the target path
if (
self._target_path is not None
and not self._memo.path
and node.name != self._target_path[0]
):
return None
with self._use_memo(node):
for decorator in node.decorator_list.copy():
if self._memo.name_matches(decorator, "typeguard.typechecked"):
# Remove the decorator to prevent duplicate instrumentation
node.decorator_list.remove(decorator)
# Store any configuration overrides
if isinstance(decorator, Call) and decorator.keywords:
self._memo.configuration_overrides.update(
{kw.arg: kw.value for kw in decorator.keywords if kw.arg}
)
self.generic_visit(node)
return node
def visit_FunctionDef(
self, node: FunctionDef | AsyncFunctionDef
) -> FunctionDef | AsyncFunctionDef | None:
"""
Injects type checks for function arguments, and for a return of None if the
function is annotated to return something else than Any or None, and the body
ends without an explicit "return".
"""
self._memo.local_names.add(node.name)
# Eliminate top level functions not belonging to the target path
if (
self._target_path is not None
and not self._memo.path
and node.name != self._target_path[0]
):
return None
# Skip instrumentation if we're instrumenting the whole module and the function
# contains either @no_type_check or @typeguard_ignore
if self._target_path is None:
for decorator in node.decorator_list:
if self._memo.name_matches(decorator, *ignore_decorators):
return node
with self._use_memo(node):
arg_annotations: dict[str, Any] = {}
if self._target_path is None or self._memo.path == self._target_path:
# Find line number we're supposed to match against
if node.decorator_list:
first_lineno = node.decorator_list[0].lineno
else:
first_lineno = node.lineno
for decorator in node.decorator_list.copy():
if self._memo.name_matches(decorator, "typing.overload"):
# Remove overloads entirely
return None
elif self._memo.name_matches(decorator, "typeguard.typechecked"):
# Remove the decorator to prevent duplicate instrumentation
node.decorator_list.remove(decorator)
# Store any configuration overrides
if isinstance(decorator, Call) and decorator.keywords:
self._memo.configuration_overrides = {
kw.arg: kw.value for kw in decorator.keywords if kw.arg
}
if self.target_lineno == first_lineno:
assert self.target_node is None
self.target_node = node
if node.decorator_list and sys.version_info >= (3, 8):
self.target_lineno = node.decorator_list[0].lineno
else:
self.target_lineno = node.lineno
all_args = node.args.args + node.args.kwonlyargs
if sys.version_info >= (3, 8):
all_args.extend(node.args.posonlyargs)
# Ensure that any type shadowed by the positional or keyword-only
# argument names are ignored in this function
for arg in all_args:
self._memo.ignored_names.add(arg.arg)
# Ensure that any type shadowed by the variable positional argument name
# (e.g. "args" in *args) is ignored this function
if node.args.vararg:
self._memo.ignored_names.add(node.args.vararg.arg)
# Ensure that any type shadowed by the variable keywrod argument name
# (e.g. "kwargs" in *kwargs) is ignored this function
if node.args.kwarg:
self._memo.ignored_names.add(node.args.kwarg.arg)
for arg in all_args:
annotation = self._convert_annotation(deepcopy(arg.annotation))
if annotation:
arg_annotations[arg.arg] = annotation
if node.args.vararg:
annotation_ = self._convert_annotation(node.args.vararg.annotation)
if annotation_:
if sys.version_info >= (3, 9):
container = Name("tuple", ctx=Load())
else:
container = self._get_import("typing", "Tuple")
subscript_slice: Tuple | Index = Tuple(
[
annotation_,
Constant(Ellipsis),
],
ctx=Load(),
)
if sys.version_info < (3, 9):
subscript_slice = Index(subscript_slice, ctx=Load())
arg_annotations[node.args.vararg.arg] = Subscript(
container, subscript_slice, ctx=Load()
)
if node.args.kwarg:
annotation_ = self._convert_annotation(node.args.kwarg.annotation)
if annotation_:
if sys.version_info >= (3, 9):
container = Name("dict", ctx=Load())
else:
container = self._get_import("typing", "Dict")
subscript_slice = Tuple(
[
Name("str", ctx=Load()),
annotation_,
],
ctx=Load(),
)
if sys.version_info < (3, 9):
subscript_slice = Index(subscript_slice, ctx=Load())
arg_annotations[node.args.kwarg.arg] = Subscript(
container, subscript_slice, ctx=Load()
)
if arg_annotations:
self._memo.variable_annotations.update(arg_annotations)
self.generic_visit(node)
if arg_annotations:
annotations_dict = Dict(
keys=[Constant(key) for key in arg_annotations.keys()],
values=[
Tuple([Name(key, ctx=Load()), annotation], ctx=Load())
for key, annotation in arg_annotations.items()
],
)
func_name = self._get_import(
"typeguard._functions", "check_argument_types"
)
args = [
self._memo.joined_path,
annotations_dict,
self._memo.get_memo_name(),
]
node.body.insert(
self._memo.code_inject_index, Expr(Call(func_name, args, []))
)
# Add a checked "return None" to the end if there's no explicit return
# Skip if the return annotation is None or Any
if (
self._memo.return_annotation
and (not self._memo.is_async or not self._memo.has_yield_expressions)
and not isinstance(node.body[-1], Return)
and (
not isinstance(self._memo.return_annotation, Constant)
or self._memo.return_annotation.value is not None
)
):
func_name = self._get_import(
"typeguard._functions", "check_return_type"
)
return_node = Return(
Call(
func_name,
[
self._memo.joined_path,
Constant(None),
self._memo.return_annotation,
self._memo.get_memo_name(),
],
[],
)
)
# Replace a placeholder "pass" at the end
if isinstance(node.body[-1], Pass):
copy_location(return_node, node.body[-1])
del node.body[-1]
node.body.append(return_node)
# Insert code to create the call memo, if it was ever needed for this
# function
if self._memo.memo_var_name:
memo_kwargs: dict[str, Any] = {}
if self._memo.parent and isinstance(self._memo.parent.node, ClassDef):
for decorator in node.decorator_list:
if (
isinstance(decorator, Name)
and decorator.id == "staticmethod"
):
break
elif (
isinstance(decorator, Name)
and decorator.id == "classmethod"
):
memo_kwargs["self_type"] = Name(
id=node.args.args[0].arg, ctx=Load()
)
break
else:
if node.args.args:
if node.name == "__new__":
memo_kwargs["self_type"] = Name(
id=node.args.args[0].arg, ctx=Load()
)
else:
memo_kwargs["self_type"] = Attribute(
Name(id=node.args.args[0].arg, ctx=Load()),
"__class__",
ctx=Load(),
)
# Construct the function reference
# Nested functions get special treatment: the function name is added
# to free variables (and the closure of the resulting function)
names: list[str] = [node.name]
memo = self._memo.parent
while memo:
if isinstance(memo.node, (FunctionDef, AsyncFunctionDef)):
# This is a nested function. Use the function name as-is.
del names[:-1]
break
elif not isinstance(memo.node, ClassDef):
break
names.insert(0, memo.node.name)
memo = memo.parent
config_keywords = self._memo.get_config_keywords()
if config_keywords:
memo_kwargs["config"] = Call(
self._get_import("dataclasses", "replace"),
[self._get_import("typeguard._config", "global_config")],
config_keywords,
)
self._memo.memo_var_name.id = self._memo.get_unused_name("memo")
memo_store_name = Name(id=self._memo.memo_var_name.id, ctx=Store())
globals_call = Call(Name(id="globals", ctx=Load()), [], [])
locals_call = Call(Name(id="locals", ctx=Load()), [], [])
memo_expr = Call(
self._get_import("typeguard", "TypeCheckMemo"),
[globals_call, locals_call],
[keyword(key, value) for key, value in memo_kwargs.items()],
)
node.body.insert(
self._memo.code_inject_index,
Assign([memo_store_name], memo_expr),
)
self._memo.insert_imports(node)
# Rmove any placeholder "pass" at the end
if isinstance(node.body[-1], Pass):
del node.body[-1]
return node
def visit_AsyncFunctionDef(
self, node: AsyncFunctionDef
) -> FunctionDef | AsyncFunctionDef | None:
return self.visit_FunctionDef(node)
def visit_Return(self, node: Return) -> Return:
"""This injects type checks into "return" statements."""
self.generic_visit(node)
if (
self._memo.return_annotation
and self._memo.should_instrument
and not self._memo.is_ignored_name(self._memo.return_annotation)
):
func_name = self._get_import("typeguard._functions", "check_return_type")
old_node = node
retval = old_node.value or Constant(None)
node = Return(
Call(
func_name,
[
self._memo.joined_path,
retval,
self._memo.return_annotation,
self._memo.get_memo_name(),
],
[],
)
)
copy_location(node, old_node)
return node
def visit_Yield(self, node: Yield) -> Yield | Call:
"""
This injects type checks into "yield" expressions, checking both the yielded
value and the value sent back to the generator, when appropriate.
"""
self._memo.has_yield_expressions = True
self.generic_visit(node)
if (
self._memo.yield_annotation
and self._memo.should_instrument
and not self._memo.is_ignored_name(self._memo.yield_annotation)
):
func_name = self._get_import("typeguard._functions", "check_yield_type")
yieldval = node.value or Constant(None)
node.value = Call(
func_name,
[
self._memo.joined_path,
yieldval,
self._memo.yield_annotation,
self._memo.get_memo_name(),
],
[],
)
if (
self._memo.send_annotation
and self._memo.should_instrument
and not self._memo.is_ignored_name(self._memo.send_annotation)
):
func_name = self._get_import("typeguard._functions", "check_send_type")
old_node = node
call_node = Call(
func_name,
[
self._memo.joined_path,
old_node,
self._memo.send_annotation,
self._memo.get_memo_name(),
],
[],
)
copy_location(call_node, old_node)
return call_node
return node
def visit_AnnAssign(self, node: AnnAssign) -> Any:
"""
This injects a type check into a local variable annotation-assignment within a
function body.
"""
self.generic_visit(node)
if (
isinstance(self._memo.node, (FunctionDef, AsyncFunctionDef))
and node.annotation
and isinstance(node.target, Name)
):
self._memo.ignored_names.add(node.target.id)
annotation = self._convert_annotation(deepcopy(node.annotation))
if annotation:
self._memo.variable_annotations[node.target.id] = annotation
if node.value:
func_name = self._get_import(
"typeguard._functions", "check_variable_assignment"
)
node.value = Call(
func_name,
[
node.value,
Constant(node.target.id),
annotation,
self._memo.get_memo_name(),
],
[],
)
return node
def visit_Assign(self, node: Assign) -> Any:
"""
This injects a type check into a local variable assignment within a function
body. The variable must have been annotated earlier in the function body.
"""
self.generic_visit(node)
# Only instrument function-local assignments
if isinstance(self._memo.node, (FunctionDef, AsyncFunctionDef)):
targets: list[dict[Constant, expr | None]] = []
check_required = False
for target in node.targets:
elts: Sequence[expr]
if isinstance(target, Name):
elts = [target]
elif isinstance(target, Tuple):
elts = target.elts
else:
continue
annotations_: dict[Constant, expr | None] = {}
for exp in elts:
prefix = ""
if isinstance(exp, Starred):
exp = exp.value
prefix = "*"
if isinstance(exp, Name):
self._memo.ignored_names.add(exp.id)
name = prefix + exp.id
annotation = self._memo.variable_annotations.get(exp.id)
if annotation:
annotations_[Constant(name)] = annotation
check_required = True
else:
annotations_[Constant(name)] = None
targets.append(annotations_)
if check_required:
# Replace missing annotations with typing.Any
for item in targets:
for key, expression in item.items():
if expression is None:
item[key] = self._get_import("typing", "Any")
if len(targets) == 1 and len(targets[0]) == 1:
func_name = self._get_import(
"typeguard._functions", "check_variable_assignment"
)
target_varname = next(iter(targets[0]))
node.value = Call(
func_name,
[
node.value,
target_varname,
targets[0][target_varname],
self._memo.get_memo_name(),
],
[],
)
elif targets:
func_name = self._get_import(
"typeguard._functions", "check_multi_variable_assignment"
)
targets_arg = List(
[
Dict(keys=list(target), values=list(target.values()))
for target in targets
],
ctx=Load(),
)
node.value = Call(
func_name,
[node.value, targets_arg, self._memo.get_memo_name()],
[],
)
return node
def visit_NamedExpr(self, node: NamedExpr) -> Any:
"""This injects a type check into an assignment expression (a := foo())."""
self.generic_visit(node)
# Only instrument function-local assignments
if isinstance(self._memo.node, (FunctionDef, AsyncFunctionDef)) and isinstance(
node.target, Name
):
self._memo.ignored_names.add(node.target.id)
# Bail out if no matching annotation is found
annotation = self._memo.variable_annotations.get(node.target.id)
if annotation is None:
return node
func_name = self._get_import(
"typeguard._functions", "check_variable_assignment"
)
node.value = Call(
func_name,
[
node.value,
Constant(node.target.id),
annotation,
self._memo.get_memo_name(),
],
[],
)
return node
def visit_AugAssign(self, node: AugAssign) -> Any:
"""
This injects a type check into an augmented assignment expression (a += 1).
"""
self.generic_visit(node)
# Only instrument function-local assignments
if isinstance(self._memo.node, (FunctionDef, AsyncFunctionDef)) and isinstance(
node.target, Name
):
# Bail out if no matching annotation is found
annotation = self._memo.variable_annotations.get(node.target.id)
if annotation is None:
return node
# Bail out if the operator is not found (newer Python version?)
try:
operator_func_name = aug_assign_functions[node.op.__class__]
except KeyError:
return node
operator_func = self._get_import("operator", operator_func_name)
operator_call = Call(
operator_func, [Name(node.target.id, ctx=Load()), node.value], []
)
check_call = Call(
self._get_import("typeguard._functions", "check_variable_assignment"),
[
operator_call,
Constant(node.target.id),
annotation,
self._memo.get_memo_name(),
],
[],
)
return Assign(targets=[node.target], value=check_call)
return node
def visit_If(self, node: If) -> Any:
"""
This blocks names from being collected from a module-level
"if typing.TYPE_CHECKING:" block, so that they won't be type checked.
"""
self.generic_visit(node)
# Fix empty node body (caused by removal of classes/functions not on the target
# path)
if not node.body:
node.body.append(Pass())
if (
self._memo is self._module_memo
and isinstance(node.test, Name)
and self._memo.name_matches(node.test, "typing.TYPE_CHECKING")
):
collector = NameCollector()
collector.visit(node)
self._memo.ignored_names.update(collector.names)
return node
================================================
FILE: metaflow/_vendor/v3_7/typeguard/_union_transformer.py
================================================
"""
Transforms lazily evaluated PEP 604 unions into typing.Unions, for compatibility with
Python versions older than 3.10.
"""
from __future__ import annotations
from ast import (
BinOp,
BitOr,
Index,
Load,
Name,
NodeTransformer,
Subscript,
fix_missing_locations,
parse,
)
from ast import Tuple as ASTTuple
from types import CodeType
from typing import Any, Dict, FrozenSet, List, Set, Tuple, Union
type_substitutions = {
"dict": Dict,
"list": List,
"tuple": Tuple,
"set": Set,
"frozenset": FrozenSet,
"Union": Union,
}
class UnionTransformer(NodeTransformer):
def __init__(self, union_name: Name | None = None):
self.union_name = union_name or Name(id="Union", ctx=Load())
def visit_BinOp(self, node: BinOp) -> Any:
self.generic_visit(node)
if isinstance(node.op, BitOr):
return Subscript(
value=self.union_name,
slice=Index(
ASTTuple(elts=[node.left, node.right], ctx=Load()), ctx=Load()
),
ctx=Load(),
)
return node
def compile_type_hint(hint: str) -> CodeType:
parsed = parse(hint, "", "eval")
UnionTransformer().visit(parsed)
fix_missing_locations(parsed)
return compile(parsed, "", "eval", flags=0)
================================================
FILE: metaflow/_vendor/v3_7/typeguard/_utils.py
================================================
from __future__ import annotations
import inspect
import sys
from importlib import import_module
from inspect import currentframe
from types import CodeType, FrameType, FunctionType
from typing import TYPE_CHECKING, Any, Callable, ForwardRef, Union, cast
from weakref import WeakValueDictionary
if TYPE_CHECKING:
from ._memo import TypeCheckMemo
if sys.version_info >= (3, 10):
from typing import get_args, get_origin
def evaluate_forwardref(forwardref: ForwardRef, memo: TypeCheckMemo) -> Any:
return forwardref._evaluate(memo.globals, memo.locals, frozenset())
else:
from metaflow._vendor.v3_7.typing_extensions import get_args, get_origin
evaluate_extra_args: tuple[frozenset[Any], ...] = (
(frozenset(),) if sys.version_info >= (3, 9) else ()
)
def evaluate_forwardref(forwardref: ForwardRef, memo: TypeCheckMemo) -> Any:
from ._union_transformer import compile_type_hint, type_substitutions
if not forwardref.__forward_evaluated__:
forwardref.__forward_code__ = compile_type_hint(forwardref.__forward_arg__)
try:
return forwardref._evaluate(memo.globals, memo.locals, *evaluate_extra_args)
except NameError:
if sys.version_info < (3, 10):
# Try again, with the type substitutions (list -> List etc.) in place
new_globals = memo.globals.copy()
new_globals.setdefault("Union", Union)
if sys.version_info < (3, 9):
new_globals.update(type_substitutions)
return forwardref._evaluate(
new_globals, memo.locals or new_globals, *evaluate_extra_args
)
raise
if sys.version_info >= (3, 8):
from typing import final
else:
from metaflow._vendor.v3_7.typing_extensions import final
_functions_map: WeakValueDictionary[CodeType, FunctionType] = WeakValueDictionary()
def get_type_name(type_: Any) -> str:
name: str
for attrname in "__name__", "_name", "__forward_arg__":
candidate = getattr(type_, attrname, None)
if isinstance(candidate, str):
name = candidate
break
else:
origin = get_origin(type_)
candidate = getattr(origin, "_name", None)
if candidate is None:
candidate = type_.__class__.__name__.strip("_")
if isinstance(candidate, str):
name = candidate
else:
return "(unknown)"
args = get_args(type_)
if args:
if name == "Literal":
formatted_args = ", ".join(repr(arg) for arg in args)
else:
formatted_args = ", ".join(get_type_name(arg) for arg in args)
name += f"[{formatted_args}]"
module = getattr(type_, "__module__", None)
if module and module not in (None, "typing", "typing_extensions", "builtins"):
name = module + "." + name
return name
def qualified_name(obj: Any, *, add_class_prefix: bool = False) -> str:
"""
Return the qualified name (e.g. package.module.Type) for the given object.
Builtins and types from the :mod:`typing` package get special treatment by having
the module name stripped from the generated name.
"""
if obj is None:
return "None"
elif inspect.isclass(obj):
prefix = "class " if add_class_prefix else ""
type_ = obj
else:
prefix = ""
type_ = type(obj)
module = type_.__module__
qualname = type_.__qualname__
name = qualname if module in ("typing", "builtins") else f"{module}.{qualname}"
return prefix + name
def function_name(func: Callable[..., Any]) -> str:
"""
Return the qualified name of the given function.
Builtins and types from the :mod:`typing` package get special treatment by having
the module name stripped from the generated name.
"""
# For partial functions and objects with __call__ defined, __qualname__ does not
# exist
module = getattr(func, "__module__", "")
qualname = (module + ".") if module not in ("builtins", "") else ""
return qualname + getattr(func, "__qualname__", repr(func))
def resolve_reference(reference: str) -> Any:
modulename, varname = reference.partition(":")[::2]
if not modulename or not varname:
raise ValueError(f"{reference!r} is not a module:varname reference")
obj = import_module(modulename)
for attr in varname.split("."):
obj = getattr(obj, attr)
return obj
def is_method_of(obj: object, cls: type) -> bool:
return (
inspect.isfunction(obj)
and obj.__module__ == cls.__module__
and obj.__qualname__.startswith(cls.__qualname__ + ".")
)
def get_stacklevel() -> int:
level = 1
frame = cast(FrameType, currentframe()).f_back
while frame and frame.f_globals.get("__name__", "").startswith("typeguard."):
level += 1
frame = frame.f_back
return level
@final
class Unset:
__slots__ = ()
def __repr__(self) -> str:
return ""
unset = Unset()
================================================
FILE: metaflow/_vendor/v3_7/typeguard/py.typed
================================================
================================================
FILE: metaflow/_vendor/v3_7/typeguard.LICENSE
================================================
This is the MIT license: http://www.opensource.org/licenses/mit-license.php
Copyright (c) Alex Grönholm
Permission is hereby granted, free of charge, to any person obtaining a copy of this
software and associated documentation files (the "Software"), to deal in the Software
without restriction, including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or
substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
================================================
FILE: metaflow/_vendor/v3_7/typing_extensions.LICENSE
================================================
A. HISTORY OF THE SOFTWARE
==========================
Python was created in the early 1990s by Guido van Rossum at Stichting
Mathematisch Centrum (CWI, see https://www.cwi.nl) in the Netherlands
as a successor of a language called ABC. Guido remains Python's
principal author, although it includes many contributions from others.
In 1995, Guido continued his work on Python at the Corporation for
National Research Initiatives (CNRI, see https://www.cnri.reston.va.us)
in Reston, Virginia where he released several versions of the
software.
In May 2000, Guido and the Python core development team moved to
BeOpen.com to form the BeOpen PythonLabs team. In October of the same
year, the PythonLabs team moved to Digital Creations, which became
Zope Corporation. In 2001, the Python Software Foundation (PSF, see
https://www.python.org/psf/) was formed, a non-profit organization
created specifically to own Python-related Intellectual Property.
Zope Corporation was a sponsoring member of the PSF.
All Python releases are Open Source (see https://opensource.org for
the Open Source Definition). Historically, most, but not all, Python
releases have also been GPL-compatible; the table below summarizes
the various releases.
Release Derived Year Owner GPL-
from compatible? (1)
0.9.0 thru 1.2 1991-1995 CWI yes
1.3 thru 1.5.2 1.2 1995-1999 CNRI yes
1.6 1.5.2 2000 CNRI no
2.0 1.6 2000 BeOpen.com no
1.6.1 1.6 2001 CNRI yes (2)
2.1 2.0+1.6.1 2001 PSF no
2.0.1 2.0+1.6.1 2001 PSF yes
2.1.1 2.1+2.0.1 2001 PSF yes
2.1.2 2.1.1 2002 PSF yes
2.1.3 2.1.2 2002 PSF yes
2.2 and above 2.1.1 2001-now PSF yes
Footnotes:
(1) GPL-compatible doesn't mean that we're distributing Python under
the GPL. All Python licenses, unlike the GPL, let you distribute
a modified version without making your changes open source. The
GPL-compatible licenses make it possible to combine Python with
other software that is released under the GPL; the others don't.
(2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
because its license has a choice of law clause. According to
CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
is "not incompatible" with the GPL.
Thanks to the many outside volunteers who have worked under Guido's
direction to make these releases possible.
B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
===============================================================
Python software and documentation are licensed under the
Python Software Foundation License Version 2.
Starting with Python 3.8.6, examples, recipes, and other code in
the documentation are dual licensed under the PSF License Version 2
and the Zero-Clause BSD license.
Some software incorporated into Python is under different licenses.
The licenses are listed with code falling under that license.
PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
--------------------------------------------
1. This LICENSE AGREEMENT is between the Python Software Foundation
("PSF"), and the Individual or Organization ("Licensee") accessing and
otherwise using this software ("Python") in source or binary form and
its associated documentation.
2. Subject to the terms and conditions of this License Agreement, PSF hereby
grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
analyze, test, perform and/or display publicly, prepare derivative works,
distribute, and otherwise use Python alone or in any derivative version,
provided, however, that PSF's License Agreement and PSF's notice of copyright,
i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation;
All Rights Reserved" are retained in Python alone or in any derivative version
prepared by Licensee.
3. In the event Licensee prepares a derivative work that is based on
or incorporates Python or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to Python.
4. PSF is making Python available to Licensee on an "AS IS"
basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between PSF and
Licensee. This License Agreement does not grant permission to use PSF
trademarks or trade name in a trademark sense to endorse or promote
products or services of Licensee, or any third party.
8. By copying, installing or otherwise using Python, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.
BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
-------------------------------------------
BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
Individual or Organization ("Licensee") accessing and otherwise using
this software in source or binary form and its associated
documentation ("the Software").
2. Subject to the terms and conditions of this BeOpen Python License
Agreement, BeOpen hereby grants Licensee a non-exclusive,
royalty-free, world-wide license to reproduce, analyze, test, perform
and/or display publicly, prepare derivative works, distribute, and
otherwise use the Software alone or in any derivative version,
provided, however, that the BeOpen Python License is retained in the
Software, alone or in any derivative version prepared by Licensee.
3. BeOpen is making the Software available to Licensee on an "AS IS"
basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
5. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
6. This License Agreement shall be governed by and interpreted in all
respects by the law of the State of California, excluding conflict of
law provisions. Nothing in this License Agreement shall be deemed to
create any relationship of agency, partnership, or joint venture
between BeOpen and Licensee. This License Agreement does not grant
permission to use BeOpen trademarks or trade names in a trademark
sense to endorse or promote products or services of Licensee, or any
third party. As an exception, the "BeOpen Python" logos available at
http://www.pythonlabs.com/logos.html may be used according to the
permissions granted on that web page.
7. By copying, installing or otherwise using the software, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.
CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
---------------------------------------
1. This LICENSE AGREEMENT is between the Corporation for National
Research Initiatives, having an office at 1895 Preston White Drive,
Reston, VA 20191 ("CNRI"), and the Individual or Organization
("Licensee") accessing and otherwise using Python 1.6.1 software in
source or binary form and its associated documentation.
2. Subject to the terms and conditions of this License Agreement, CNRI
hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display publicly,
prepare derivative works, distribute, and otherwise use Python 1.6.1
alone or in any derivative version, provided, however, that CNRI's
License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
1995-2001 Corporation for National Research Initiatives; All Rights
Reserved" are retained in Python 1.6.1 alone or in any derivative
version prepared by Licensee. Alternately, in lieu of CNRI's License
Agreement, Licensee may substitute the following text (omitting the
quotes): "Python 1.6.1 is made available subject to the terms and
conditions in CNRI's License Agreement. This Agreement together with
Python 1.6.1 may be located on the internet using the following
unique, persistent identifier (known as a handle): 1895.22/1013. This
Agreement may also be obtained from a proxy server on the internet
using the following URL: http://hdl.handle.net/1895.22/1013".
3. In the event Licensee prepares a derivative work that is based on
or incorporates Python 1.6.1 or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to Python 1.6.1.
4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. This License Agreement shall be governed by the federal
intellectual property law of the United States, including without
limitation the federal copyright law, and, to the extent such
U.S. federal law does not apply, by the law of the Commonwealth of
Virginia, excluding Virginia's conflict of law provisions.
Notwithstanding the foregoing, with regard to derivative works based
on Python 1.6.1 that incorporate non-separable material that was
previously distributed under the GNU General Public License (GPL), the
law of the Commonwealth of Virginia shall govern this License
Agreement only as to issues arising under or with respect to
Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this
License Agreement shall be deemed to create any relationship of
agency, partnership, or joint venture between CNRI and Licensee. This
License Agreement does not grant permission to use CNRI trademarks or
trade name in a trademark sense to endorse or promote products or
services of Licensee, or any third party.
8. By clicking on the "ACCEPT" button where indicated, or by copying,
installing or otherwise using Python 1.6.1, Licensee agrees to be
bound by the terms and conditions of this License Agreement.
ACCEPT
CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
--------------------------------------------------
Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
The Netherlands. All rights reserved.
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the name of Stichting Mathematisch
Centrum or CWI not be used in advertising or publicity pertaining to
distribution of the software without specific, written prior
permission.
STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
ZERO-CLAUSE BSD LICENSE FOR CODE IN THE PYTHON DOCUMENTATION
----------------------------------------------------------------------
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
================================================
FILE: metaflow/_vendor/v3_7/typing_extensions.py
================================================
import abc
import collections
import collections.abc
import functools
import inspect
import operator
import sys
import types as _types
import typing
import warnings
__all__ = [
# Super-special typing primitives.
'Any',
'ClassVar',
'Concatenate',
'Final',
'LiteralString',
'ParamSpec',
'ParamSpecArgs',
'ParamSpecKwargs',
'Self',
'Type',
'TypeVar',
'TypeVarTuple',
'Unpack',
# ABCs (from collections.abc).
'Awaitable',
'AsyncIterator',
'AsyncIterable',
'Coroutine',
'AsyncGenerator',
'AsyncContextManager',
'Buffer',
'ChainMap',
# Concrete collection types.
'ContextManager',
'Counter',
'Deque',
'DefaultDict',
'NamedTuple',
'OrderedDict',
'TypedDict',
# Structural checks, a.k.a. protocols.
'SupportsAbs',
'SupportsBytes',
'SupportsComplex',
'SupportsFloat',
'SupportsIndex',
'SupportsInt',
'SupportsRound',
# One-off things.
'Annotated',
'assert_never',
'assert_type',
'clear_overloads',
'dataclass_transform',
'deprecated',
'get_overloads',
'final',
'get_args',
'get_origin',
'get_original_bases',
'get_protocol_members',
'get_type_hints',
'IntVar',
'is_protocol',
'is_typeddict',
'Literal',
'NewType',
'overload',
'override',
'Protocol',
'reveal_type',
'runtime',
'runtime_checkable',
'Text',
'TypeAlias',
'TypeAliasType',
'TypeGuard',
'TYPE_CHECKING',
'Never',
'NoReturn',
'Required',
'NotRequired',
# Pure aliases, have always been in typing
'AbstractSet',
'AnyStr',
'BinaryIO',
'Callable',
'Collection',
'Container',
'Dict',
'ForwardRef',
'FrozenSet',
'Generator',
'Generic',
'Hashable',
'IO',
'ItemsView',
'Iterable',
'Iterator',
'KeysView',
'List',
'Mapping',
'MappingView',
'Match',
'MutableMapping',
'MutableSequence',
'MutableSet',
'Optional',
'Pattern',
'Reversible',
'Sequence',
'Set',
'Sized',
'TextIO',
'Tuple',
'Union',
'ValuesView',
'cast',
'no_type_check',
'no_type_check_decorator',
]
# for backward compatibility
PEP_560 = True
GenericMeta = type
# The functions below are modified copies of typing internal helpers.
# They are needed by _ProtocolMeta and they provide support for PEP 646.
class _Sentinel:
def __repr__(self):
return ""
_marker = _Sentinel()
def _check_generic(cls, parameters, elen=_marker):
"""Check correct count for parameters of a generic cls (internal helper).
This gives a nice error message in case of count mismatch.
"""
if not elen:
raise TypeError(f"{cls} is not a generic class")
if elen is _marker:
if not hasattr(cls, "__parameters__") or not cls.__parameters__:
raise TypeError(f"{cls} is not a generic class")
elen = len(cls.__parameters__)
alen = len(parameters)
if alen != elen:
if hasattr(cls, "__parameters__"):
parameters = [p for p in cls.__parameters__ if not _is_unpack(p)]
num_tv_tuples = sum(isinstance(p, TypeVarTuple) for p in parameters)
if (num_tv_tuples > 0) and (alen >= elen - num_tv_tuples):
return
raise TypeError(f"Too {'many' if alen > elen else 'few'} parameters for {cls};"
f" actual {alen}, expected {elen}")
if sys.version_info >= (3, 10):
def _should_collect_from_parameters(t):
return isinstance(
t, (typing._GenericAlias, _types.GenericAlias, _types.UnionType)
)
elif sys.version_info >= (3, 9):
def _should_collect_from_parameters(t):
return isinstance(t, (typing._GenericAlias, _types.GenericAlias))
else:
def _should_collect_from_parameters(t):
return isinstance(t, typing._GenericAlias) and not t._special
def _collect_type_vars(types, typevar_types=None):
"""Collect all type variable contained in types in order of
first appearance (lexicographic order). For example::
_collect_type_vars((T, List[S, T])) == (T, S)
"""
if typevar_types is None:
typevar_types = typing.TypeVar
tvars = []
for t in types:
if (
isinstance(t, typevar_types) and
t not in tvars and
not _is_unpack(t)
):
tvars.append(t)
if _should_collect_from_parameters(t):
tvars.extend([t for t in t.__parameters__ if t not in tvars])
return tuple(tvars)
NoReturn = typing.NoReturn
# Some unconstrained type variables. These are used by the container types.
# (These are not for export.)
T = typing.TypeVar('T') # Any type.
KT = typing.TypeVar('KT') # Key type.
VT = typing.TypeVar('VT') # Value type.
T_co = typing.TypeVar('T_co', covariant=True) # Any type covariant containers.
T_contra = typing.TypeVar('T_contra', contravariant=True) # Ditto contravariant.
if sys.version_info >= (3, 11):
from typing import Any
else:
class _AnyMeta(type):
def __instancecheck__(self, obj):
if self is Any:
raise TypeError("typing_extensions.Any cannot be used with isinstance()")
return super().__instancecheck__(obj)
def __repr__(self):
if self is Any:
return "typing_extensions.Any"
return super().__repr__()
class Any(metaclass=_AnyMeta):
"""Special type indicating an unconstrained type.
- Any is compatible with every type.
- Any assumed to have all methods.
- All values assumed to be instances of Any.
Note that all the above statements are true from the point of view of
static type checkers. At runtime, Any should not be used with instance
checks.
"""
def __new__(cls, *args, **kwargs):
if cls is Any:
raise TypeError("Any cannot be instantiated")
return super().__new__(cls, *args, **kwargs)
ClassVar = typing.ClassVar
class _ExtensionsSpecialForm(typing._SpecialForm, _root=True):
def __repr__(self):
return 'typing_extensions.' + self._name
# On older versions of typing there is an internal class named "Final".
# 3.8+
if hasattr(typing, 'Final') and sys.version_info[:2] >= (3, 7):
Final = typing.Final
# 3.7
else:
class _FinalForm(_ExtensionsSpecialForm, _root=True):
def __getitem__(self, parameters):
item = typing._type_check(parameters,
f'{self._name} accepts only a single type.')
return typing._GenericAlias(self, (item,))
Final = _FinalForm('Final',
doc="""A special typing construct to indicate that a name
cannot be re-assigned or overridden in a subclass.
For example:
MAX_SIZE: Final = 9000
MAX_SIZE += 1 # Error reported by type checker
class Connection:
TIMEOUT: Final[int] = 10
class FastConnector(Connection):
TIMEOUT = 1 # Error reported by type checker
There is no runtime checking of these properties.""")
if sys.version_info >= (3, 11):
final = typing.final
else:
# @final exists in 3.8+, but we backport it for all versions
# before 3.11 to keep support for the __final__ attribute.
# See https://bugs.python.org/issue46342
def final(f):
"""This decorator can be used to indicate to type checkers that
the decorated method cannot be overridden, and decorated class
cannot be subclassed. For example:
class Base:
@final
def done(self) -> None:
...
class Sub(Base):
def done(self) -> None: # Error reported by type checker
...
@final
class Leaf:
...
class Other(Leaf): # Error reported by type checker
...
There is no runtime checking of these properties. The decorator
sets the ``__final__`` attribute to ``True`` on the decorated object
to allow runtime introspection.
"""
try:
f.__final__ = True
except (AttributeError, TypeError):
# Skip the attribute silently if it is not writable.
# AttributeError happens if the object has __slots__ or a
# read-only property, TypeError if it's a builtin class.
pass
return f
def IntVar(name):
return typing.TypeVar(name)
# A Literal bug was fixed in 3.11.0, 3.10.1 and 3.9.8
if sys.version_info >= (3, 10, 1):
Literal = typing.Literal
else:
def _flatten_literal_params(parameters):
"""An internal helper for Literal creation: flatten Literals among parameters"""
params = []
for p in parameters:
if isinstance(p, _LiteralGenericAlias):
params.extend(p.__args__)
else:
params.append(p)
return tuple(params)
def _value_and_type_iter(params):
for p in params:
yield p, type(p)
class _LiteralGenericAlias(typing._GenericAlias, _root=True):
def __eq__(self, other):
if not isinstance(other, _LiteralGenericAlias):
return NotImplemented
these_args_deduped = set(_value_and_type_iter(self.__args__))
other_args_deduped = set(_value_and_type_iter(other.__args__))
return these_args_deduped == other_args_deduped
def __hash__(self):
return hash(frozenset(_value_and_type_iter(self.__args__)))
class _LiteralForm(_ExtensionsSpecialForm, _root=True):
def __init__(self, doc: str):
self._name = 'Literal'
self._doc = self.__doc__ = doc
def __getitem__(self, parameters):
if not isinstance(parameters, tuple):
parameters = (parameters,)
parameters = _flatten_literal_params(parameters)
val_type_pairs = list(_value_and_type_iter(parameters))
try:
deduped_pairs = set(val_type_pairs)
except TypeError:
# unhashable parameters
pass
else:
# similar logic to typing._deduplicate on Python 3.9+
if len(deduped_pairs) < len(val_type_pairs):
new_parameters = []
for pair in val_type_pairs:
if pair in deduped_pairs:
new_parameters.append(pair[0])
deduped_pairs.remove(pair)
assert not deduped_pairs, deduped_pairs
parameters = tuple(new_parameters)
return _LiteralGenericAlias(self, parameters)
Literal = _LiteralForm(doc="""\
A type that can be used to indicate to type checkers
that the corresponding value has a value literally equivalent
to the provided parameter. For example:
var: Literal[4] = 4
The type checker understands that 'var' is literally equal to
the value 4 and no other value.
Literal[...] cannot be subclassed. There is no runtime
checking verifying that the parameter is actually a value
instead of a type.""")
_overload_dummy = typing._overload_dummy
if hasattr(typing, "get_overloads"): # 3.11+
overload = typing.overload
get_overloads = typing.get_overloads
clear_overloads = typing.clear_overloads
else:
# {module: {qualname: {firstlineno: func}}}
_overload_registry = collections.defaultdict(
functools.partial(collections.defaultdict, dict)
)
def overload(func):
"""Decorator for overloaded functions/methods.
In a stub file, place two or more stub definitions for the same
function in a row, each decorated with @overload. For example:
@overload
def utf8(value: None) -> None: ...
@overload
def utf8(value: bytes) -> bytes: ...
@overload
def utf8(value: str) -> bytes: ...
In a non-stub file (i.e. a regular .py file), do the same but
follow it with an implementation. The implementation should *not*
be decorated with @overload. For example:
@overload
def utf8(value: None) -> None: ...
@overload
def utf8(value: bytes) -> bytes: ...
@overload
def utf8(value: str) -> bytes: ...
def utf8(value):
# implementation goes here
The overloads for a function can be retrieved at runtime using the
get_overloads() function.
"""
# classmethod and staticmethod
f = getattr(func, "__func__", func)
try:
_overload_registry[f.__module__][f.__qualname__][
f.__code__.co_firstlineno
] = func
except AttributeError:
# Not a normal function; ignore.
pass
return _overload_dummy
def get_overloads(func):
"""Return all defined overloads for *func* as a sequence."""
# classmethod and staticmethod
f = getattr(func, "__func__", func)
if f.__module__ not in _overload_registry:
return []
mod_dict = _overload_registry[f.__module__]
if f.__qualname__ not in mod_dict:
return []
return list(mod_dict[f.__qualname__].values())
def clear_overloads():
"""Clear all overloads in the registry."""
_overload_registry.clear()
# This is not a real generic class. Don't use outside annotations.
Type = typing.Type
# Various ABCs mimicking those in collections.abc.
# A few are simply re-exported for completeness.
Awaitable = typing.Awaitable
Coroutine = typing.Coroutine
AsyncIterable = typing.AsyncIterable
AsyncIterator = typing.AsyncIterator
Deque = typing.Deque
ContextManager = typing.ContextManager
AsyncContextManager = typing.AsyncContextManager
DefaultDict = typing.DefaultDict
# 3.7.2+
if hasattr(typing, 'OrderedDict'):
OrderedDict = typing.OrderedDict
# 3.7.0-3.7.2
else:
OrderedDict = typing._alias(collections.OrderedDict, (KT, VT))
Counter = typing.Counter
ChainMap = typing.ChainMap
AsyncGenerator = typing.AsyncGenerator
Text = typing.Text
TYPE_CHECKING = typing.TYPE_CHECKING
_PROTO_ALLOWLIST = {
'collections.abc': [
'Callable', 'Awaitable', 'Iterable', 'Iterator', 'AsyncIterable',
'Hashable', 'Sized', 'Container', 'Collection', 'Reversible', 'Buffer',
],
'contextlib': ['AbstractContextManager', 'AbstractAsyncContextManager'],
'typing_extensions': ['Buffer'],
}
_EXCLUDED_ATTRS = {
"__abstractmethods__", "__annotations__", "__weakref__", "_is_protocol",
"_is_runtime_protocol", "__dict__", "__slots__", "__parameters__",
"__orig_bases__", "__module__", "_MutableMapping__marker", "__doc__",
"__subclasshook__", "__orig_class__", "__init__", "__new__",
"__protocol_attrs__", "__callable_proto_members_only__",
}
if sys.version_info < (3, 8):
_EXCLUDED_ATTRS |= {
"_gorg", "__next_in_mro__", "__extra__", "__tree_hash__", "__args__",
"__origin__"
}
if sys.version_info >= (3, 9):
_EXCLUDED_ATTRS.add("__class_getitem__")
if sys.version_info >= (3, 12):
_EXCLUDED_ATTRS.add("__type_params__")
_EXCLUDED_ATTRS = frozenset(_EXCLUDED_ATTRS)
def _get_protocol_attrs(cls):
attrs = set()
for base in cls.__mro__[:-1]: # without object
if base.__name__ in {'Protocol', 'Generic'}:
continue
annotations = getattr(base, '__annotations__', {})
for attr in (*base.__dict__, *annotations):
if (not attr.startswith('_abc_') and attr not in _EXCLUDED_ATTRS):
attrs.add(attr)
return attrs
def _maybe_adjust_parameters(cls):
"""Helper function used in Protocol.__init_subclass__ and _TypedDictMeta.__new__.
The contents of this function are very similar
to logic found in typing.Generic.__init_subclass__
on the CPython main branch.
"""
tvars = []
if '__orig_bases__' in cls.__dict__:
tvars = _collect_type_vars(cls.__orig_bases__)
# Look for Generic[T1, ..., Tn] or Protocol[T1, ..., Tn].
# If found, tvars must be a subset of it.
# If not found, tvars is it.
# Also check for and reject plain Generic,
# and reject multiple Generic[...] and/or Protocol[...].
gvars = None
for base in cls.__orig_bases__:
if (isinstance(base, typing._GenericAlias) and
base.__origin__ in (typing.Generic, Protocol)):
# for error messages
the_base = base.__origin__.__name__
if gvars is not None:
raise TypeError(
"Cannot inherit from Generic[...]"
" and/or Protocol[...] multiple types.")
gvars = base.__parameters__
if gvars is None:
gvars = tvars
else:
tvarset = set(tvars)
gvarset = set(gvars)
if not tvarset <= gvarset:
s_vars = ', '.join(str(t) for t in tvars if t not in gvarset)
s_args = ', '.join(str(g) for g in gvars)
raise TypeError(f"Some type variables ({s_vars}) are"
f" not listed in {the_base}[{s_args}]")
tvars = gvars
cls.__parameters__ = tuple(tvars)
def _caller(depth=2):
try:
return sys._getframe(depth).f_globals.get('__name__', '__main__')
except (AttributeError, ValueError): # For platforms without _getframe()
return None
# The performance of runtime-checkable protocols is significantly improved on Python 3.12,
# so we backport the 3.12 version of Protocol to Python <=3.11
if sys.version_info >= (3, 12):
Protocol = typing.Protocol
else:
def _allow_reckless_class_checks(depth=3):
"""Allow instance and class checks for special stdlib modules.
The abc and functools modules indiscriminately call isinstance() and
issubclass() on the whole MRO of a user class, which may contain protocols.
"""
return _caller(depth) in {'abc', 'functools', None}
def _no_init(self, *args, **kwargs):
if type(self)._is_protocol:
raise TypeError('Protocols cannot be instantiated')
if sys.version_info >= (3, 8):
# Inheriting from typing._ProtocolMeta isn't actually desirable,
# but is necessary to allow typing.Protocol and typing_extensions.Protocol
# to mix without getting TypeErrors about "metaclass conflict"
_typing_Protocol = typing.Protocol
_ProtocolMetaBase = type(_typing_Protocol)
else:
_typing_Protocol = _marker
_ProtocolMetaBase = abc.ABCMeta
class _ProtocolMeta(_ProtocolMetaBase):
# This metaclass is somewhat unfortunate,
# but is necessary for several reasons...
#
# NOTE: DO NOT call super() in any methods in this class
# That would call the methods on typing._ProtocolMeta on Python 3.8-3.11
# and those are slow
def __new__(mcls, name, bases, namespace, **kwargs):
if name == "Protocol" and len(bases) < 2:
pass
elif {Protocol, _typing_Protocol} & set(bases):
for base in bases:
if not (
base in {object, typing.Generic, Protocol, _typing_Protocol}
or base.__name__ in _PROTO_ALLOWLIST.get(base.__module__, [])
or is_protocol(base)
):
raise TypeError(
f"Protocols can only inherit from other protocols, "
f"got {base!r}"
)
return abc.ABCMeta.__new__(mcls, name, bases, namespace, **kwargs)
def __init__(cls, *args, **kwargs):
abc.ABCMeta.__init__(cls, *args, **kwargs)
if getattr(cls, "_is_protocol", False):
cls.__protocol_attrs__ = _get_protocol_attrs(cls)
# PEP 544 prohibits using issubclass()
# with protocols that have non-method members.
cls.__callable_proto_members_only__ = all(
callable(getattr(cls, attr, None)) for attr in cls.__protocol_attrs__
)
def __subclasscheck__(cls, other):
if cls is Protocol:
return type.__subclasscheck__(cls, other)
if (
getattr(cls, '_is_protocol', False)
and not _allow_reckless_class_checks()
):
if not isinstance(other, type):
# Same error message as for issubclass(1, int).
raise TypeError('issubclass() arg 1 must be a class')
if (
not cls.__callable_proto_members_only__
and cls.__dict__.get("__subclasshook__") is _proto_hook
):
raise TypeError(
"Protocols with non-method members don't support issubclass()"
)
if not getattr(cls, '_is_runtime_protocol', False):
raise TypeError(
"Instance and class checks can only be used with "
"@runtime_checkable protocols"
)
return abc.ABCMeta.__subclasscheck__(cls, other)
def __instancecheck__(cls, instance):
# We need this method for situations where attributes are
# assigned in __init__.
if cls is Protocol:
return type.__instancecheck__(cls, instance)
if not getattr(cls, "_is_protocol", False):
# i.e., it's a concrete subclass of a protocol
return abc.ABCMeta.__instancecheck__(cls, instance)
if (
not getattr(cls, '_is_runtime_protocol', False) and
not _allow_reckless_class_checks()
):
raise TypeError("Instance and class checks can only be used with"
" @runtime_checkable protocols")
if abc.ABCMeta.__instancecheck__(cls, instance):
return True
for attr in cls.__protocol_attrs__:
try:
val = inspect.getattr_static(instance, attr)
except AttributeError:
break
if val is None and callable(getattr(cls, attr, None)):
break
else:
return True
return False
def __eq__(cls, other):
# Hack so that typing.Generic.__class_getitem__
# treats typing_extensions.Protocol
# as equivalent to typing.Protocol on Python 3.8+
if abc.ABCMeta.__eq__(cls, other) is True:
return True
return (
cls is Protocol and other is getattr(typing, "Protocol", object())
)
# This has to be defined, or the abc-module cache
# complains about classes with this metaclass being unhashable,
# if we define only __eq__!
def __hash__(cls) -> int:
return type.__hash__(cls)
@classmethod
def _proto_hook(cls, other):
if not cls.__dict__.get('_is_protocol', False):
return NotImplemented
for attr in cls.__protocol_attrs__:
for base in other.__mro__:
# Check if the members appears in the class dictionary...
if attr in base.__dict__:
if base.__dict__[attr] is None:
return NotImplemented
break
# ...or in annotations, if it is a sub-protocol.
annotations = getattr(base, '__annotations__', {})
if (
isinstance(annotations, collections.abc.Mapping)
and attr in annotations
and is_protocol(other)
):
break
else:
return NotImplemented
return True
if sys.version_info >= (3, 8):
class Protocol(typing.Generic, metaclass=_ProtocolMeta):
__doc__ = typing.Protocol.__doc__
__slots__ = ()
_is_protocol = True
_is_runtime_protocol = False
def __init_subclass__(cls, *args, **kwargs):
super().__init_subclass__(*args, **kwargs)
# Determine if this is a protocol or a concrete subclass.
if not cls.__dict__.get('_is_protocol', False):
cls._is_protocol = any(b is Protocol for b in cls.__bases__)
# Set (or override) the protocol subclass hook.
if '__subclasshook__' not in cls.__dict__:
cls.__subclasshook__ = _proto_hook
# Prohibit instantiation for protocol classes
if cls._is_protocol and cls.__init__ is Protocol.__init__:
cls.__init__ = _no_init
else:
class Protocol(metaclass=_ProtocolMeta):
# There is quite a lot of overlapping code with typing.Generic.
# Unfortunately it is hard to avoid this on Python <3.8,
# as the typing module on Python 3.7 doesn't let us subclass typing.Generic!
"""Base class for protocol classes. Protocol classes are defined as::
class Proto(Protocol):
def meth(self) -> int:
...
Such classes are primarily used with static type checkers that recognize
structural subtyping (static duck-typing), for example::
class C:
def meth(self) -> int:
return 0
def func(x: Proto) -> int:
return x.meth()
func(C()) # Passes static type check
See PEP 544 for details. Protocol classes decorated with
@typing_extensions.runtime_checkable act
as simple-minded runtime-checkable protocols that check
only the presence of given attributes, ignoring their type signatures.
Protocol classes can be generic, they are defined as::
class GenProto(Protocol[T]):
def meth(self) -> T:
...
"""
__slots__ = ()
_is_protocol = True
_is_runtime_protocol = False
def __new__(cls, *args, **kwds):
if cls is Protocol:
raise TypeError("Type Protocol cannot be instantiated; "
"it can only be used as a base class")
return super().__new__(cls)
@typing._tp_cache
def __class_getitem__(cls, params):
if not isinstance(params, tuple):
params = (params,)
if not params and cls is not typing.Tuple:
raise TypeError(
f"Parameter list to {cls.__qualname__}[...] cannot be empty")
msg = "Parameters to generic types must be types."
params = tuple(typing._type_check(p, msg) for p in params)
if cls is Protocol:
# Generic can only be subscripted with unique type variables.
if not all(isinstance(p, typing.TypeVar) for p in params):
i = 0
while isinstance(params[i], typing.TypeVar):
i += 1
raise TypeError(
"Parameters to Protocol[...] must all be type variables."
f" Parameter {i + 1} is {params[i]}")
if len(set(params)) != len(params):
raise TypeError(
"Parameters to Protocol[...] must all be unique")
else:
# Subscripting a regular Generic subclass.
_check_generic(cls, params, len(cls.__parameters__))
return typing._GenericAlias(cls, params)
def __init_subclass__(cls, *args, **kwargs):
if '__orig_bases__' in cls.__dict__:
error = typing.Generic in cls.__orig_bases__
else:
error = typing.Generic in cls.__bases__
if error:
raise TypeError("Cannot inherit from plain Generic")
_maybe_adjust_parameters(cls)
# Determine if this is a protocol or a concrete subclass.
if not cls.__dict__.get('_is_protocol', None):
cls._is_protocol = any(b is Protocol for b in cls.__bases__)
# Set (or override) the protocol subclass hook.
if '__subclasshook__' not in cls.__dict__:
cls.__subclasshook__ = _proto_hook
# Prohibit instantiation for protocol classes
if cls._is_protocol and cls.__init__ is Protocol.__init__:
cls.__init__ = _no_init
if sys.version_info >= (3, 8):
runtime_checkable = typing.runtime_checkable
else:
def runtime_checkable(cls):
"""Mark a protocol class as a runtime protocol, so that it
can be used with isinstance() and issubclass(). Raise TypeError
if applied to a non-protocol class.
This allows a simple-minded structural check very similar to the
one-offs in collections.abc such as Hashable.
"""
if not (
(isinstance(cls, _ProtocolMeta) or issubclass(cls, typing.Generic))
and getattr(cls, "_is_protocol", False)
):
raise TypeError('@runtime_checkable can be only applied to protocol classes,'
f' got {cls!r}')
cls._is_runtime_protocol = True
return cls
# Exists for backwards compatibility.
runtime = runtime_checkable
# Our version of runtime-checkable protocols is faster on Python 3.7-3.11
if sys.version_info >= (3, 12):
SupportsInt = typing.SupportsInt
SupportsFloat = typing.SupportsFloat
SupportsComplex = typing.SupportsComplex
SupportsBytes = typing.SupportsBytes
SupportsIndex = typing.SupportsIndex
SupportsAbs = typing.SupportsAbs
SupportsRound = typing.SupportsRound
else:
@runtime_checkable
class SupportsInt(Protocol):
"""An ABC with one abstract method __int__."""
__slots__ = ()
@abc.abstractmethod
def __int__(self) -> int:
pass
@runtime_checkable
class SupportsFloat(Protocol):
"""An ABC with one abstract method __float__."""
__slots__ = ()
@abc.abstractmethod
def __float__(self) -> float:
pass
@runtime_checkable
class SupportsComplex(Protocol):
"""An ABC with one abstract method __complex__."""
__slots__ = ()
@abc.abstractmethod
def __complex__(self) -> complex:
pass
@runtime_checkable
class SupportsBytes(Protocol):
"""An ABC with one abstract method __bytes__."""
__slots__ = ()
@abc.abstractmethod
def __bytes__(self) -> bytes:
pass
@runtime_checkable
class SupportsIndex(Protocol):
__slots__ = ()
@abc.abstractmethod
def __index__(self) -> int:
pass
@runtime_checkable
class SupportsAbs(Protocol[T_co]):
"""
An ABC with one abstract method __abs__ that is covariant in its return type.
"""
__slots__ = ()
@abc.abstractmethod
def __abs__(self) -> T_co:
pass
@runtime_checkable
class SupportsRound(Protocol[T_co]):
"""
An ABC with one abstract method __round__ that is covariant in its return type.
"""
__slots__ = ()
@abc.abstractmethod
def __round__(self, ndigits: int = 0) -> T_co:
pass
def _ensure_subclassable(mro_entries):
def inner(func):
if sys.implementation.name == "pypy" and sys.version_info < (3, 9):
cls_dict = {
"__call__": staticmethod(func),
"__mro_entries__": staticmethod(mro_entries)
}
t = type(func.__name__, (), cls_dict)
return functools.update_wrapper(t(), func)
else:
func.__mro_entries__ = mro_entries
return func
return inner
if sys.version_info >= (3, 13):
# The standard library TypedDict in Python 3.8 does not store runtime information
# about which (if any) keys are optional. See https://bugs.python.org/issue38834
# The standard library TypedDict in Python 3.9.0/1 does not honour the "total"
# keyword with old-style TypedDict(). See https://bugs.python.org/issue42059
# The standard library TypedDict below Python 3.11 does not store runtime
# information about optional and required keys when using Required or NotRequired.
# Generic TypedDicts are also impossible using typing.TypedDict on Python <3.11.
# Aaaand on 3.12 we add __orig_bases__ to TypedDict
# to enable better runtime introspection.
# On 3.13 we deprecate some odd ways of creating TypedDicts.
TypedDict = typing.TypedDict
_TypedDictMeta = typing._TypedDictMeta
is_typeddict = typing.is_typeddict
else:
# 3.10.0 and later
_TAKES_MODULE = "module" in inspect.signature(typing._type_check).parameters
if sys.version_info >= (3, 8):
_fake_name = "Protocol"
else:
_fake_name = "_Protocol"
class _TypedDictMeta(type):
def __new__(cls, name, bases, ns, total=True):
"""Create new typed dict class object.
This method is called when TypedDict is subclassed,
or when TypedDict is instantiated. This way
TypedDict supports all three syntax forms described in its docstring.
Subclasses and instances of TypedDict return actual dictionaries.
"""
for base in bases:
if type(base) is not _TypedDictMeta and base is not typing.Generic:
raise TypeError('cannot inherit from both a TypedDict type '
'and a non-TypedDict base class')
if any(issubclass(b, typing.Generic) for b in bases):
generic_base = (typing.Generic,)
else:
generic_base = ()
# typing.py generally doesn't let you inherit from plain Generic, unless
# the name of the class happens to be "Protocol" (or "_Protocol" on 3.7).
tp_dict = type.__new__(_TypedDictMeta, _fake_name, (*generic_base, dict), ns)
tp_dict.__name__ = name
if tp_dict.__qualname__ == _fake_name:
tp_dict.__qualname__ = name
if not hasattr(tp_dict, '__orig_bases__'):
tp_dict.__orig_bases__ = bases
annotations = {}
own_annotations = ns.get('__annotations__', {})
msg = "TypedDict('Name', {f0: t0, f1: t1, ...}); each t must be a type"
if _TAKES_MODULE:
own_annotations = {
n: typing._type_check(tp, msg, module=tp_dict.__module__)
for n, tp in own_annotations.items()
}
else:
own_annotations = {
n: typing._type_check(tp, msg)
for n, tp in own_annotations.items()
}
required_keys = set()
optional_keys = set()
for base in bases:
annotations.update(base.__dict__.get('__annotations__', {}))
required_keys.update(base.__dict__.get('__required_keys__', ()))
optional_keys.update(base.__dict__.get('__optional_keys__', ()))
annotations.update(own_annotations)
for annotation_key, annotation_type in own_annotations.items():
annotation_origin = get_origin(annotation_type)
if annotation_origin is Annotated:
annotation_args = get_args(annotation_type)
if annotation_args:
annotation_type = annotation_args[0]
annotation_origin = get_origin(annotation_type)
if annotation_origin is Required:
required_keys.add(annotation_key)
elif annotation_origin is NotRequired:
optional_keys.add(annotation_key)
elif total:
required_keys.add(annotation_key)
else:
optional_keys.add(annotation_key)
tp_dict.__annotations__ = annotations
tp_dict.__required_keys__ = frozenset(required_keys)
tp_dict.__optional_keys__ = frozenset(optional_keys)
if not hasattr(tp_dict, '__total__'):
tp_dict.__total__ = total
return tp_dict
__call__ = dict # static method
def __subclasscheck__(cls, other):
# Typed dicts are only for static structural subtyping.
raise TypeError('TypedDict does not support instance and class checks')
__instancecheck__ = __subclasscheck__
_TypedDict = type.__new__(_TypedDictMeta, 'TypedDict', (), {})
@_ensure_subclassable(lambda bases: (_TypedDict,))
def TypedDict(__typename, __fields=_marker, *, total=True, **kwargs):
"""A simple typed namespace. At runtime it is equivalent to a plain dict.
TypedDict creates a dictionary type such that a type checker will expect all
instances to have a certain set of keys, where each key is
associated with a value of a consistent type. This expectation
is not checked at runtime.
Usage::
class Point2D(TypedDict):
x: int
y: int
label: str
a: Point2D = {'x': 1, 'y': 2, 'label': 'good'} # OK
b: Point2D = {'z': 3, 'label': 'bad'} # Fails type check
assert Point2D(x=1, y=2, label='first') == dict(x=1, y=2, label='first')
The type info can be accessed via the Point2D.__annotations__ dict, and
the Point2D.__required_keys__ and Point2D.__optional_keys__ frozensets.
TypedDict supports an additional equivalent form::
Point2D = TypedDict('Point2D', {'x': int, 'y': int, 'label': str})
By default, all keys must be present in a TypedDict. It is possible
to override this by specifying totality::
class Point2D(TypedDict, total=False):
x: int
y: int
This means that a Point2D TypedDict can have any of the keys omitted. A type
checker is only expected to support a literal False or True as the value of
the total argument. True is the default, and makes all items defined in the
class body be required.
The Required and NotRequired special forms can also be used to mark
individual keys as being required or not required::
class Point2D(TypedDict):
x: int # the "x" key must always be present (Required is the default)
y: NotRequired[int] # the "y" key can be omitted
See PEP 655 for more details on Required and NotRequired.
"""
if __fields is _marker or __fields is None:
if __fields is _marker:
deprecated_thing = "Failing to pass a value for the 'fields' parameter"
else:
deprecated_thing = "Passing `None` as the 'fields' parameter"
example = f"`{__typename} = TypedDict({__typename!r}, {{}})`"
deprecation_msg = (
f"{deprecated_thing} is deprecated and will be disallowed in "
"Python 3.15. To create a TypedDict class with 0 fields "
"using the functional syntax, pass an empty dictionary, e.g. "
) + example + "."
warnings.warn(deprecation_msg, DeprecationWarning, stacklevel=2)
__fields = kwargs
elif kwargs:
raise TypeError("TypedDict takes either a dict or keyword arguments,"
" but not both")
if kwargs:
warnings.warn(
"The kwargs-based syntax for TypedDict definitions is deprecated "
"in Python 3.11, will be removed in Python 3.13, and may not be "
"understood by third-party type checkers.",
DeprecationWarning,
stacklevel=2,
)
ns = {'__annotations__': dict(__fields)}
module = _caller()
if module is not None:
# Setting correct module is necessary to make typed dict classes pickleable.
ns['__module__'] = module
td = _TypedDictMeta(__typename, (), ns, total=total)
td.__orig_bases__ = (TypedDict,)
return td
if hasattr(typing, "_TypedDictMeta"):
_TYPEDDICT_TYPES = (typing._TypedDictMeta, _TypedDictMeta)
else:
_TYPEDDICT_TYPES = (_TypedDictMeta,)
def is_typeddict(tp):
"""Check if an annotation is a TypedDict class
For example::
class Film(TypedDict):
title: str
year: int
is_typeddict(Film) # => True
is_typeddict(Union[list, str]) # => False
"""
# On 3.8, this would otherwise return True
if hasattr(typing, "TypedDict") and tp is typing.TypedDict:
return False
return isinstance(tp, _TYPEDDICT_TYPES)
if hasattr(typing, "assert_type"):
assert_type = typing.assert_type
else:
def assert_type(__val, __typ):
"""Assert (to the type checker) that the value is of the given type.
When the type checker encounters a call to assert_type(), it
emits an error if the value is not of the specified type::
def greet(name: str) -> None:
assert_type(name, str) # ok
assert_type(name, int) # type checker error
At runtime this returns the first argument unchanged and otherwise
does nothing.
"""
return __val
if hasattr(typing, "Required"):
get_type_hints = typing.get_type_hints
else:
# replaces _strip_annotations()
def _strip_extras(t):
"""Strips Annotated, Required and NotRequired from a given type."""
if isinstance(t, _AnnotatedAlias):
return _strip_extras(t.__origin__)
if hasattr(t, "__origin__") and t.__origin__ in (Required, NotRequired):
return _strip_extras(t.__args__[0])
if isinstance(t, typing._GenericAlias):
stripped_args = tuple(_strip_extras(a) for a in t.__args__)
if stripped_args == t.__args__:
return t
return t.copy_with(stripped_args)
if hasattr(_types, "GenericAlias") and isinstance(t, _types.GenericAlias):
stripped_args = tuple(_strip_extras(a) for a in t.__args__)
if stripped_args == t.__args__:
return t
return _types.GenericAlias(t.__origin__, stripped_args)
if hasattr(_types, "UnionType") and isinstance(t, _types.UnionType):
stripped_args = tuple(_strip_extras(a) for a in t.__args__)
if stripped_args == t.__args__:
return t
return functools.reduce(operator.or_, stripped_args)
return t
def get_type_hints(obj, globalns=None, localns=None, include_extras=False):
"""Return type hints for an object.
This is often the same as obj.__annotations__, but it handles
forward references encoded as string literals, adds Optional[t] if a
default value equal to None is set and recursively replaces all
'Annotated[T, ...]', 'Required[T]' or 'NotRequired[T]' with 'T'
(unless 'include_extras=True').
The argument may be a module, class, method, or function. The annotations
are returned as a dictionary. For classes, annotations include also
inherited members.
TypeError is raised if the argument is not of a type that can contain
annotations, and an empty dictionary is returned if no annotations are
present.
BEWARE -- the behavior of globalns and localns is counterintuitive
(unless you are familiar with how eval() and exec() work). The
search order is locals first, then globals.
- If no dict arguments are passed, an attempt is made to use the
globals from obj (or the respective module's globals for classes),
and these are also used as the locals. If the object does not appear
to have globals, an empty dictionary is used.
- If one dict argument is passed, it is used for both globals and
locals.
- If two dict arguments are passed, they specify globals and
locals, respectively.
"""
if hasattr(typing, "Annotated"):
hint = typing.get_type_hints(
obj, globalns=globalns, localns=localns, include_extras=True
)
else:
hint = typing.get_type_hints(obj, globalns=globalns, localns=localns)
if include_extras:
return hint
return {k: _strip_extras(t) for k, t in hint.items()}
# Python 3.9+ has PEP 593 (Annotated)
if hasattr(typing, 'Annotated'):
Annotated = typing.Annotated
# Not exported and not a public API, but needed for get_origin() and get_args()
# to work.
_AnnotatedAlias = typing._AnnotatedAlias
# 3.7-3.8
else:
class _AnnotatedAlias(typing._GenericAlias, _root=True):
"""Runtime representation of an annotated type.
At its core 'Annotated[t, dec1, dec2, ...]' is an alias for the type 't'
with extra annotations. The alias behaves like a normal typing alias,
instantiating is the same as instantiating the underlying type, binding
it to types is also the same.
"""
def __init__(self, origin, metadata):
if isinstance(origin, _AnnotatedAlias):
metadata = origin.__metadata__ + metadata
origin = origin.__origin__
super().__init__(origin, origin)
self.__metadata__ = metadata
def copy_with(self, params):
assert len(params) == 1
new_type = params[0]
return _AnnotatedAlias(new_type, self.__metadata__)
def __repr__(self):
return (f"typing_extensions.Annotated[{typing._type_repr(self.__origin__)}, "
f"{', '.join(repr(a) for a in self.__metadata__)}]")
def __reduce__(self):
return operator.getitem, (
Annotated, (self.__origin__,) + self.__metadata__
)
def __eq__(self, other):
if not isinstance(other, _AnnotatedAlias):
return NotImplemented
if self.__origin__ != other.__origin__:
return False
return self.__metadata__ == other.__metadata__
def __hash__(self):
return hash((self.__origin__, self.__metadata__))
class Annotated:
"""Add context specific metadata to a type.
Example: Annotated[int, runtime_check.Unsigned] indicates to the
hypothetical runtime_check module that this type is an unsigned int.
Every other consumer of this type can ignore this metadata and treat
this type as int.
The first argument to Annotated must be a valid type (and will be in
the __origin__ field), the remaining arguments are kept as a tuple in
the __extra__ field.
Details:
- It's an error to call `Annotated` with less than two arguments.
- Nested Annotated are flattened::
Annotated[Annotated[T, Ann1, Ann2], Ann3] == Annotated[T, Ann1, Ann2, Ann3]
- Instantiating an annotated type is equivalent to instantiating the
underlying type::
Annotated[C, Ann1](5) == C(5)
- Annotated can be used as a generic type alias::
Optimized = Annotated[T, runtime.Optimize()]
Optimized[int] == Annotated[int, runtime.Optimize()]
OptimizedList = Annotated[List[T], runtime.Optimize()]
OptimizedList[int] == Annotated[List[int], runtime.Optimize()]
"""
__slots__ = ()
def __new__(cls, *args, **kwargs):
raise TypeError("Type Annotated cannot be instantiated.")
@typing._tp_cache
def __class_getitem__(cls, params):
if not isinstance(params, tuple) or len(params) < 2:
raise TypeError("Annotated[...] should be used "
"with at least two arguments (a type and an "
"annotation).")
allowed_special_forms = (ClassVar, Final)
if get_origin(params[0]) in allowed_special_forms:
origin = params[0]
else:
msg = "Annotated[t, ...]: t must be a type."
origin = typing._type_check(params[0], msg)
metadata = tuple(params[1:])
return _AnnotatedAlias(origin, metadata)
def __init_subclass__(cls, *args, **kwargs):
raise TypeError(
f"Cannot subclass {cls.__module__}.Annotated"
)
# Python 3.8 has get_origin() and get_args() but those implementations aren't
# Annotated-aware, so we can't use those. Python 3.9's versions don't support
# ParamSpecArgs and ParamSpecKwargs, so only Python 3.10's versions will do.
if sys.version_info[:2] >= (3, 10):
get_origin = typing.get_origin
get_args = typing.get_args
# 3.7-3.9
else:
try:
# 3.9+
from typing import _BaseGenericAlias
except ImportError:
_BaseGenericAlias = typing._GenericAlias
try:
# 3.9+
from typing import GenericAlias as _typing_GenericAlias
except ImportError:
_typing_GenericAlias = typing._GenericAlias
def get_origin(tp):
"""Get the unsubscripted version of a type.
This supports generic types, Callable, Tuple, Union, Literal, Final, ClassVar
and Annotated. Return None for unsupported types. Examples::
get_origin(Literal[42]) is Literal
get_origin(int) is None
get_origin(ClassVar[int]) is ClassVar
get_origin(Generic) is Generic
get_origin(Generic[T]) is Generic
get_origin(Union[T, int]) is Union
get_origin(List[Tuple[T, T]][int]) == list
get_origin(P.args) is P
"""
if isinstance(tp, _AnnotatedAlias):
return Annotated
if isinstance(tp, (typing._GenericAlias, _typing_GenericAlias, _BaseGenericAlias,
ParamSpecArgs, ParamSpecKwargs)):
return tp.__origin__
if tp is typing.Generic:
return typing.Generic
return None
def get_args(tp):
"""Get type arguments with all substitutions performed.
For unions, basic simplifications used by Union constructor are performed.
Examples::
get_args(Dict[str, int]) == (str, int)
get_args(int) == ()
get_args(Union[int, Union[T, int], str][int]) == (int, str)
get_args(Union[int, Tuple[T, int]][str]) == (int, Tuple[str, int])
get_args(Callable[[], T][int]) == ([], int)
"""
if isinstance(tp, _AnnotatedAlias):
return (tp.__origin__,) + tp.__metadata__
if isinstance(tp, (typing._GenericAlias, _typing_GenericAlias)):
if getattr(tp, "_special", False):
return ()
res = tp.__args__
if get_origin(tp) is collections.abc.Callable and res[0] is not Ellipsis:
res = (list(res[:-1]), res[-1])
return res
return ()
# 3.10+
if hasattr(typing, 'TypeAlias'):
TypeAlias = typing.TypeAlias
# 3.9
elif sys.version_info[:2] >= (3, 9):
@_ExtensionsSpecialForm
def TypeAlias(self, parameters):
"""Special marker indicating that an assignment should
be recognized as a proper type alias definition by type
checkers.
For example::
Predicate: TypeAlias = Callable[..., bool]
It's invalid when used anywhere except as in the example above.
"""
raise TypeError(f"{self} is not subscriptable")
# 3.7-3.8
else:
TypeAlias = _ExtensionsSpecialForm(
'TypeAlias',
doc="""Special marker indicating that an assignment should
be recognized as a proper type alias definition by type
checkers.
For example::
Predicate: TypeAlias = Callable[..., bool]
It's invalid when used anywhere except as in the example
above."""
)
def _set_default(type_param, default):
if isinstance(default, (tuple, list)):
type_param.__default__ = tuple((typing._type_check(d, "Default must be a type")
for d in default))
elif default != _marker:
type_param.__default__ = typing._type_check(default, "Default must be a type")
else:
type_param.__default__ = None
def _set_module(typevarlike):
# for pickling:
def_mod = _caller(depth=3)
if def_mod != 'typing_extensions':
typevarlike.__module__ = def_mod
class _DefaultMixin:
"""Mixin for TypeVarLike defaults."""
__slots__ = ()
__init__ = _set_default
# Classes using this metaclass must provide a _backported_typevarlike ClassVar
class _TypeVarLikeMeta(type):
def __instancecheck__(cls, __instance: Any) -> bool:
return isinstance(__instance, cls._backported_typevarlike)
# Add default and infer_variance parameters from PEP 696 and 695
class TypeVar(metaclass=_TypeVarLikeMeta):
"""Type variable."""
_backported_typevarlike = typing.TypeVar
def __new__(cls, name, *constraints, bound=None,
covariant=False, contravariant=False,
default=_marker, infer_variance=False):
if hasattr(typing, "TypeAliasType"):
# PEP 695 implemented, can pass infer_variance to typing.TypeVar
typevar = typing.TypeVar(name, *constraints, bound=bound,
covariant=covariant, contravariant=contravariant,
infer_variance=infer_variance)
else:
typevar = typing.TypeVar(name, *constraints, bound=bound,
covariant=covariant, contravariant=contravariant)
if infer_variance and (covariant or contravariant):
raise ValueError("Variance cannot be specified with infer_variance.")
typevar.__infer_variance__ = infer_variance
_set_default(typevar, default)
_set_module(typevar)
return typevar
def __init_subclass__(cls) -> None:
raise TypeError(f"type '{__name__}.TypeVar' is not an acceptable base type")
# Python 3.10+ has PEP 612
if hasattr(typing, 'ParamSpecArgs'):
ParamSpecArgs = typing.ParamSpecArgs
ParamSpecKwargs = typing.ParamSpecKwargs
# 3.7-3.9
else:
class _Immutable:
"""Mixin to indicate that object should not be copied."""
__slots__ = ()
def __copy__(self):
return self
def __deepcopy__(self, memo):
return self
class ParamSpecArgs(_Immutable):
"""The args for a ParamSpec object.
Given a ParamSpec object P, P.args is an instance of ParamSpecArgs.
ParamSpecArgs objects have a reference back to their ParamSpec:
P.args.__origin__ is P
This type is meant for runtime introspection and has no special meaning to
static type checkers.
"""
def __init__(self, origin):
self.__origin__ = origin
def __repr__(self):
return f"{self.__origin__.__name__}.args"
def __eq__(self, other):
if not isinstance(other, ParamSpecArgs):
return NotImplemented
return self.__origin__ == other.__origin__
class ParamSpecKwargs(_Immutable):
"""The kwargs for a ParamSpec object.
Given a ParamSpec object P, P.kwargs is an instance of ParamSpecKwargs.
ParamSpecKwargs objects have a reference back to their ParamSpec:
P.kwargs.__origin__ is P
This type is meant for runtime introspection and has no special meaning to
static type checkers.
"""
def __init__(self, origin):
self.__origin__ = origin
def __repr__(self):
return f"{self.__origin__.__name__}.kwargs"
def __eq__(self, other):
if not isinstance(other, ParamSpecKwargs):
return NotImplemented
return self.__origin__ == other.__origin__
# 3.10+
if hasattr(typing, 'ParamSpec'):
# Add default parameter - PEP 696
class ParamSpec(metaclass=_TypeVarLikeMeta):
"""Parameter specification."""
_backported_typevarlike = typing.ParamSpec
def __new__(cls, name, *, bound=None,
covariant=False, contravariant=False,
infer_variance=False, default=_marker):
if hasattr(typing, "TypeAliasType"):
# PEP 695 implemented, can pass infer_variance to typing.TypeVar
paramspec = typing.ParamSpec(name, bound=bound,
covariant=covariant,
contravariant=contravariant,
infer_variance=infer_variance)
else:
paramspec = typing.ParamSpec(name, bound=bound,
covariant=covariant,
contravariant=contravariant)
paramspec.__infer_variance__ = infer_variance
_set_default(paramspec, default)
_set_module(paramspec)
return paramspec
def __init_subclass__(cls) -> None:
raise TypeError(f"type '{__name__}.ParamSpec' is not an acceptable base type")
# 3.7-3.9
else:
# Inherits from list as a workaround for Callable checks in Python < 3.9.2.
class ParamSpec(list, _DefaultMixin):
"""Parameter specification variable.
Usage::
P = ParamSpec('P')
Parameter specification variables exist primarily for the benefit of static
type checkers. They are used to forward the parameter types of one
callable to another callable, a pattern commonly found in higher order
functions and decorators. They are only valid when used in ``Concatenate``,
or s the first argument to ``Callable``. In Python 3.10 and higher,
they are also supported in user-defined Generics at runtime.
See class Generic for more information on generic types. An
example for annotating a decorator::
T = TypeVar('T')
P = ParamSpec('P')
def add_logging(f: Callable[P, T]) -> Callable[P, T]:
'''A type-safe decorator to add logging to a function.'''
def inner(*args: P.args, **kwargs: P.kwargs) -> T:
logging.info(f'{f.__name__} was called')
return f(*args, **kwargs)
return inner
@add_logging
def add_two(x: float, y: float) -> float:
'''Add two numbers together.'''
return x + y
Parameter specification variables defined with covariant=True or
contravariant=True can be used to declare covariant or contravariant
generic types. These keyword arguments are valid, but their actual semantics
are yet to be decided. See PEP 612 for details.
Parameter specification variables can be introspected. e.g.:
P.__name__ == 'T'
P.__bound__ == None
P.__covariant__ == False
P.__contravariant__ == False
Note that only parameter specification variables defined in global scope can
be pickled.
"""
# Trick Generic __parameters__.
__class__ = typing.TypeVar
@property
def args(self):
return ParamSpecArgs(self)
@property
def kwargs(self):
return ParamSpecKwargs(self)
def __init__(self, name, *, bound=None, covariant=False, contravariant=False,
infer_variance=False, default=_marker):
super().__init__([self])
self.__name__ = name
self.__covariant__ = bool(covariant)
self.__contravariant__ = bool(contravariant)
self.__infer_variance__ = bool(infer_variance)
if bound:
self.__bound__ = typing._type_check(bound, 'Bound must be a type.')
else:
self.__bound__ = None
_DefaultMixin.__init__(self, default)
# for pickling:
def_mod = _caller()
if def_mod != 'typing_extensions':
self.__module__ = def_mod
def __repr__(self):
if self.__infer_variance__:
prefix = ''
elif self.__covariant__:
prefix = '+'
elif self.__contravariant__:
prefix = '-'
else:
prefix = '~'
return prefix + self.__name__
def __hash__(self):
return object.__hash__(self)
def __eq__(self, other):
return self is other
def __reduce__(self):
return self.__name__
# Hack to get typing._type_check to pass.
def __call__(self, *args, **kwargs):
pass
# 3.7-3.9
if not hasattr(typing, 'Concatenate'):
# Inherits from list as a workaround for Callable checks in Python < 3.9.2.
class _ConcatenateGenericAlias(list):
# Trick Generic into looking into this for __parameters__.
__class__ = typing._GenericAlias
# Flag in 3.8.
_special = False
def __init__(self, origin, args):
super().__init__(args)
self.__origin__ = origin
self.__args__ = args
def __repr__(self):
_type_repr = typing._type_repr
return (f'{_type_repr(self.__origin__)}'
f'[{", ".join(_type_repr(arg) for arg in self.__args__)}]')
def __hash__(self):
return hash((self.__origin__, self.__args__))
# Hack to get typing._type_check to pass in Generic.
def __call__(self, *args, **kwargs):
pass
@property
def __parameters__(self):
return tuple(
tp for tp in self.__args__ if isinstance(tp, (typing.TypeVar, ParamSpec))
)
# 3.7-3.9
@typing._tp_cache
def _concatenate_getitem(self, parameters):
if parameters == ():
raise TypeError("Cannot take a Concatenate of no types.")
if not isinstance(parameters, tuple):
parameters = (parameters,)
if not isinstance(parameters[-1], ParamSpec):
raise TypeError("The last parameter to Concatenate should be a "
"ParamSpec variable.")
msg = "Concatenate[arg, ...]: each arg must be a type."
parameters = tuple(typing._type_check(p, msg) for p in parameters)
return _ConcatenateGenericAlias(self, parameters)
# 3.10+
if hasattr(typing, 'Concatenate'):
Concatenate = typing.Concatenate
_ConcatenateGenericAlias = typing._ConcatenateGenericAlias # noqa: F811
# 3.9
elif sys.version_info[:2] >= (3, 9):
@_ExtensionsSpecialForm
def Concatenate(self, parameters):
"""Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a
higher order function which adds, removes or transforms parameters of a
callable.
For example::
Callable[Concatenate[int, P], int]
See PEP 612 for detailed information.
"""
return _concatenate_getitem(self, parameters)
# 3.7-8
else:
class _ConcatenateForm(_ExtensionsSpecialForm, _root=True):
def __getitem__(self, parameters):
return _concatenate_getitem(self, parameters)
Concatenate = _ConcatenateForm(
'Concatenate',
doc="""Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a
higher order function which adds, removes or transforms parameters of a
callable.
For example::
Callable[Concatenate[int, P], int]
See PEP 612 for detailed information.
""")
# 3.10+
if hasattr(typing, 'TypeGuard'):
TypeGuard = typing.TypeGuard
# 3.9
elif sys.version_info[:2] >= (3, 9):
@_ExtensionsSpecialForm
def TypeGuard(self, parameters):
"""Special typing form used to annotate the return type of a user-defined
type guard function. ``TypeGuard`` only accepts a single type argument.
At runtime, functions marked this way should return a boolean.
``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static
type checkers to determine a more precise type of an expression within a
program's code flow. Usually type narrowing is done by analyzing
conditional code flow and applying the narrowing to a block of code. The
conditional expression here is sometimes referred to as a "type guard".
Sometimes it would be convenient to use a user-defined boolean function
as a type guard. Such a function should use ``TypeGuard[...]`` as its
return type to alert static type checkers to this intention.
Using ``-> TypeGuard`` tells the static type checker that for a given
function:
1. The return value is a boolean.
2. If the return value is ``True``, the type of its argument
is the type inside ``TypeGuard``.
For example::
def is_str(val: Union[str, float]):
# "isinstance" type guard
if isinstance(val, str):
# Type of ``val`` is narrowed to ``str``
...
else:
# Else, type of ``val`` is narrowed to ``float``.
...
Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower
form of ``TypeA`` (it can even be a wider form) and this may lead to
type-unsafe results. The main reason is to allow for things like
narrowing ``List[object]`` to ``List[str]`` even though the latter is not
a subtype of the former, since ``List`` is invariant. The responsibility of
writing type-safe type guards is left to the user.
``TypeGuard`` also works with type variables. For more information, see
PEP 647 (User-Defined Type Guards).
"""
item = typing._type_check(parameters, f'{self} accepts only a single type.')
return typing._GenericAlias(self, (item,))
# 3.7-3.8
else:
class _TypeGuardForm(_ExtensionsSpecialForm, _root=True):
def __getitem__(self, parameters):
item = typing._type_check(parameters,
f'{self._name} accepts only a single type')
return typing._GenericAlias(self, (item,))
TypeGuard = _TypeGuardForm(
'TypeGuard',
doc="""Special typing form used to annotate the return type of a user-defined
type guard function. ``TypeGuard`` only accepts a single type argument.
At runtime, functions marked this way should return a boolean.
``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static
type checkers to determine a more precise type of an expression within a
program's code flow. Usually type narrowing is done by analyzing
conditional code flow and applying the narrowing to a block of code. The
conditional expression here is sometimes referred to as a "type guard".
Sometimes it would be convenient to use a user-defined boolean function
as a type guard. Such a function should use ``TypeGuard[...]`` as its
return type to alert static type checkers to this intention.
Using ``-> TypeGuard`` tells the static type checker that for a given
function:
1. The return value is a boolean.
2. If the return value is ``True``, the type of its argument
is the type inside ``TypeGuard``.
For example::
def is_str(val: Union[str, float]):
# "isinstance" type guard
if isinstance(val, str):
# Type of ``val`` is narrowed to ``str``
...
else:
# Else, type of ``val`` is narrowed to ``float``.
...
Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower
form of ``TypeA`` (it can even be a wider form) and this may lead to
type-unsafe results. The main reason is to allow for things like
narrowing ``List[object]`` to ``List[str]`` even though the latter is not
a subtype of the former, since ``List`` is invariant. The responsibility of
writing type-safe type guards is left to the user.
``TypeGuard`` also works with type variables. For more information, see
PEP 647 (User-Defined Type Guards).
""")
# Vendored from cpython typing._SpecialFrom
class _SpecialForm(typing._Final, _root=True):
__slots__ = ('_name', '__doc__', '_getitem')
def __init__(self, getitem):
self._getitem = getitem
self._name = getitem.__name__
self.__doc__ = getitem.__doc__
def __getattr__(self, item):
if item in {'__name__', '__qualname__'}:
return self._name
raise AttributeError(item)
def __mro_entries__(self, bases):
raise TypeError(f"Cannot subclass {self!r}")
def __repr__(self):
return f'typing_extensions.{self._name}'
def __reduce__(self):
return self._name
def __call__(self, *args, **kwds):
raise TypeError(f"Cannot instantiate {self!r}")
def __or__(self, other):
return typing.Union[self, other]
def __ror__(self, other):
return typing.Union[other, self]
def __instancecheck__(self, obj):
raise TypeError(f"{self} cannot be used with isinstance()")
def __subclasscheck__(self, cls):
raise TypeError(f"{self} cannot be used with issubclass()")
@typing._tp_cache
def __getitem__(self, parameters):
return self._getitem(self, parameters)
if hasattr(typing, "LiteralString"):
LiteralString = typing.LiteralString
else:
@_SpecialForm
def LiteralString(self, params):
"""Represents an arbitrary literal string.
Example::
from metaflow._vendor.v3_7.typing_extensions import LiteralString
def query(sql: LiteralString) -> ...:
...
query("SELECT * FROM table") # ok
query(f"SELECT * FROM {input()}") # not ok
See PEP 675 for details.
"""
raise TypeError(f"{self} is not subscriptable")
if hasattr(typing, "Self"):
Self = typing.Self
else:
@_SpecialForm
def Self(self, params):
"""Used to spell the type of "self" in classes.
Example::
from typing import Self
class ReturnsSelf:
def parse(self, data: bytes) -> Self:
...
return self
"""
raise TypeError(f"{self} is not subscriptable")
if hasattr(typing, "Never"):
Never = typing.Never
else:
@_SpecialForm
def Never(self, params):
"""The bottom type, a type that has no members.
This can be used to define a function that should never be
called, or a function that never returns::
from metaflow._vendor.v3_7.typing_extensions import Never
def never_call_me(arg: Never) -> None:
pass
def int_or_str(arg: int | str) -> None:
never_call_me(arg) # type checker error
match arg:
case int():
print("It's an int")
case str():
print("It's a str")
case _:
never_call_me(arg) # ok, arg is of type Never
"""
raise TypeError(f"{self} is not subscriptable")
if hasattr(typing, 'Required'):
Required = typing.Required
NotRequired = typing.NotRequired
elif sys.version_info[:2] >= (3, 9):
@_ExtensionsSpecialForm
def Required(self, parameters):
"""A special typing construct to mark a key of a total=False TypedDict
as required. For example:
class Movie(TypedDict, total=False):
title: Required[str]
year: int
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
There is no runtime checking that a required key is actually provided
when instantiating a related TypedDict.
"""
item = typing._type_check(parameters, f'{self._name} accepts only a single type.')
return typing._GenericAlias(self, (item,))
@_ExtensionsSpecialForm
def NotRequired(self, parameters):
"""A special typing construct to mark a key of a TypedDict as
potentially missing. For example:
class Movie(TypedDict):
title: str
year: NotRequired[int]
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
"""
item = typing._type_check(parameters, f'{self._name} accepts only a single type.')
return typing._GenericAlias(self, (item,))
else:
class _RequiredForm(_ExtensionsSpecialForm, _root=True):
def __getitem__(self, parameters):
item = typing._type_check(parameters,
f'{self._name} accepts only a single type.')
return typing._GenericAlias(self, (item,))
Required = _RequiredForm(
'Required',
doc="""A special typing construct to mark a key of a total=False TypedDict
as required. For example:
class Movie(TypedDict, total=False):
title: Required[str]
year: int
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
There is no runtime checking that a required key is actually provided
when instantiating a related TypedDict.
""")
NotRequired = _RequiredForm(
'NotRequired',
doc="""A special typing construct to mark a key of a TypedDict as
potentially missing. For example:
class Movie(TypedDict):
title: str
year: NotRequired[int]
m = Movie(
title='The Matrix', # typechecker error if key is omitted
year=1999,
)
""")
_UNPACK_DOC = """\
Type unpack operator.
The type unpack operator takes the child types from some container type,
such as `tuple[int, str]` or a `TypeVarTuple`, and 'pulls them out'. For
example:
# For some generic class `Foo`:
Foo[Unpack[tuple[int, str]]] # Equivalent to Foo[int, str]
Ts = TypeVarTuple('Ts')
# Specifies that `Bar` is generic in an arbitrary number of types.
# (Think of `Ts` as a tuple of an arbitrary number of individual
# `TypeVar`s, which the `Unpack` is 'pulling out' directly into the
# `Generic[]`.)
class Bar(Generic[Unpack[Ts]]): ...
Bar[int] # Valid
Bar[int, str] # Also valid
From Python 3.11, this can also be done using the `*` operator:
Foo[*tuple[int, str]]
class Bar(Generic[*Ts]): ...
The operator can also be used along with a `TypedDict` to annotate
`**kwargs` in a function signature. For instance:
class Movie(TypedDict):
name: str
year: int
# This function expects two keyword arguments - *name* of type `str` and
# *year* of type `int`.
def foo(**kwargs: Unpack[Movie]): ...
Note that there is only some runtime checking of this operator. Not
everything the runtime allows may be accepted by static type checkers.
For more information, see PEP 646 and PEP 692.
"""
if sys.version_info >= (3, 12): # PEP 692 changed the repr of Unpack[]
Unpack = typing.Unpack
def _is_unpack(obj):
return get_origin(obj) is Unpack
elif sys.version_info[:2] >= (3, 9):
class _UnpackSpecialForm(_ExtensionsSpecialForm, _root=True):
def __init__(self, getitem):
super().__init__(getitem)
self.__doc__ = _UNPACK_DOC
class _UnpackAlias(typing._GenericAlias, _root=True):
__class__ = typing.TypeVar
@_UnpackSpecialForm
def Unpack(self, parameters):
item = typing._type_check(parameters, f'{self._name} accepts only a single type.')
return _UnpackAlias(self, (item,))
def _is_unpack(obj):
return isinstance(obj, _UnpackAlias)
else:
class _UnpackAlias(typing._GenericAlias, _root=True):
__class__ = typing.TypeVar
class _UnpackForm(_ExtensionsSpecialForm, _root=True):
def __getitem__(self, parameters):
item = typing._type_check(parameters,
f'{self._name} accepts only a single type.')
return _UnpackAlias(self, (item,))
Unpack = _UnpackForm('Unpack', doc=_UNPACK_DOC)
def _is_unpack(obj):
return isinstance(obj, _UnpackAlias)
if hasattr(typing, "TypeVarTuple"): # 3.11+
# Add default parameter - PEP 696
class TypeVarTuple(metaclass=_TypeVarLikeMeta):
"""Type variable tuple."""
_backported_typevarlike = typing.TypeVarTuple
def __new__(cls, name, *, default=_marker):
tvt = typing.TypeVarTuple(name)
_set_default(tvt, default)
_set_module(tvt)
return tvt
def __init_subclass__(self, *args, **kwds):
raise TypeError("Cannot subclass special typing classes")
else:
class TypeVarTuple(_DefaultMixin):
"""Type variable tuple.
Usage::
Ts = TypeVarTuple('Ts')
In the same way that a normal type variable is a stand-in for a single
type such as ``int``, a type variable *tuple* is a stand-in for a *tuple*
type such as ``Tuple[int, str]``.
Type variable tuples can be used in ``Generic`` declarations.
Consider the following example::
class Array(Generic[*Ts]): ...
The ``Ts`` type variable tuple here behaves like ``tuple[T1, T2]``,
where ``T1`` and ``T2`` are type variables. To use these type variables
as type parameters of ``Array``, we must *unpack* the type variable tuple using
the star operator: ``*Ts``. The signature of ``Array`` then behaves
as if we had simply written ``class Array(Generic[T1, T2]): ...``.
In contrast to ``Generic[T1, T2]``, however, ``Generic[*Shape]`` allows
us to parameterise the class with an *arbitrary* number of type parameters.
Type variable tuples can be used anywhere a normal ``TypeVar`` can.
This includes class definitions, as shown above, as well as function
signatures and variable annotations::
class Array(Generic[*Ts]):
def __init__(self, shape: Tuple[*Ts]):
self._shape: Tuple[*Ts] = shape
def get_shape(self) -> Tuple[*Ts]:
return self._shape
shape = (Height(480), Width(640))
x: Array[Height, Width] = Array(shape)
y = abs(x) # Inferred type is Array[Height, Width]
z = x + x # ... is Array[Height, Width]
x.get_shape() # ... is tuple[Height, Width]
"""
# Trick Generic __parameters__.
__class__ = typing.TypeVar
def __iter__(self):
yield self.__unpacked__
def __init__(self, name, *, default=_marker):
self.__name__ = name
_DefaultMixin.__init__(self, default)
# for pickling:
def_mod = _caller()
if def_mod != 'typing_extensions':
self.__module__ = def_mod
self.__unpacked__ = Unpack[self]
def __repr__(self):
return self.__name__
def __hash__(self):
return object.__hash__(self)
def __eq__(self, other):
return self is other
def __reduce__(self):
return self.__name__
def __init_subclass__(self, *args, **kwds):
if '_root' not in kwds:
raise TypeError("Cannot subclass special typing classes")
if hasattr(typing, "reveal_type"):
reveal_type = typing.reveal_type
else:
def reveal_type(__obj: T) -> T:
"""Reveal the inferred type of a variable.
When a static type checker encounters a call to ``reveal_type()``,
it will emit the inferred type of the argument::
x: int = 1
reveal_type(x)
Running a static type checker (e.g., ``mypy``) on this example
will produce output similar to 'Revealed type is "builtins.int"'.
At runtime, the function prints the runtime type of the
argument and returns it unchanged.
"""
print(f"Runtime type is {type(__obj).__name__!r}", file=sys.stderr)
return __obj
if hasattr(typing, "assert_never"):
assert_never = typing.assert_never
else:
def assert_never(__arg: Never) -> Never:
"""Assert to the type checker that a line of code is unreachable.
Example::
def int_or_str(arg: int | str) -> None:
match arg:
case int():
print("It's an int")
case str():
print("It's a str")
case _:
assert_never(arg)
If a type checker finds that a call to assert_never() is
reachable, it will emit an error.
At runtime, this throws an exception when called.
"""
raise AssertionError("Expected code to be unreachable")
if sys.version_info >= (3, 12):
# dataclass_transform exists in 3.11 but lacks the frozen_default parameter
dataclass_transform = typing.dataclass_transform
else:
def dataclass_transform(
*,
eq_default: bool = True,
order_default: bool = False,
kw_only_default: bool = False,
frozen_default: bool = False,
field_specifiers: typing.Tuple[
typing.Union[typing.Type[typing.Any], typing.Callable[..., typing.Any]],
...
] = (),
**kwargs: typing.Any,
) -> typing.Callable[[T], T]:
"""Decorator that marks a function, class, or metaclass as providing
dataclass-like behavior.
Example:
from metaflow._vendor.v3_7.typing_extensions import dataclass_transform
_T = TypeVar("_T")
# Used on a decorator function
@dataclass_transform()
def create_model(cls: type[_T]) -> type[_T]:
...
return cls
@create_model
class CustomerModel:
id: int
name: str
# Used on a base class
@dataclass_transform()
class ModelBase: ...
class CustomerModel(ModelBase):
id: int
name: str
# Used on a metaclass
@dataclass_transform()
class ModelMeta(type): ...
class ModelBase(metaclass=ModelMeta): ...
class CustomerModel(ModelBase):
id: int
name: str
Each of the ``CustomerModel`` classes defined in this example will now
behave similarly to a dataclass created with the ``@dataclasses.dataclass``
decorator. For example, the type checker will synthesize an ``__init__``
method.
The arguments to this decorator can be used to customize this behavior:
- ``eq_default`` indicates whether the ``eq`` parameter is assumed to be
True or False if it is omitted by the caller.
- ``order_default`` indicates whether the ``order`` parameter is
assumed to be True or False if it is omitted by the caller.
- ``kw_only_default`` indicates whether the ``kw_only`` parameter is
assumed to be True or False if it is omitted by the caller.
- ``frozen_default`` indicates whether the ``frozen`` parameter is
assumed to be True or False if it is omitted by the caller.
- ``field_specifiers`` specifies a static list of supported classes
or functions that describe fields, similar to ``dataclasses.field()``.
At runtime, this decorator records its arguments in the
``__dataclass_transform__`` attribute on the decorated object.
See PEP 681 for details.
"""
def decorator(cls_or_fn):
cls_or_fn.__dataclass_transform__ = {
"eq_default": eq_default,
"order_default": order_default,
"kw_only_default": kw_only_default,
"frozen_default": frozen_default,
"field_specifiers": field_specifiers,
"kwargs": kwargs,
}
return cls_or_fn
return decorator
if hasattr(typing, "override"):
override = typing.override
else:
_F = typing.TypeVar("_F", bound=typing.Callable[..., typing.Any])
def override(__arg: _F) -> _F:
"""Indicate that a method is intended to override a method in a base class.
Usage:
class Base:
def method(self) -> None: ...
pass
class Child(Base):
@override
def method(self) -> None:
super().method()
When this decorator is applied to a method, the type checker will
validate that it overrides a method with the same name on a base class.
This helps prevent bugs that may occur when a base class is changed
without an equivalent change to a child class.
There is no runtime checking of these properties. The decorator
sets the ``__override__`` attribute to ``True`` on the decorated object
to allow runtime introspection.
See PEP 698 for details.
"""
try:
__arg.__override__ = True
except (AttributeError, TypeError):
# Skip the attribute silently if it is not writable.
# AttributeError happens if the object has __slots__ or a
# read-only property, TypeError if it's a builtin class.
pass
return __arg
if hasattr(typing, "deprecated"):
deprecated = typing.deprecated
else:
_T = typing.TypeVar("_T")
def deprecated(
__msg: str,
*,
category: typing.Optional[typing.Type[Warning]] = DeprecationWarning,
stacklevel: int = 1,
) -> typing.Callable[[_T], _T]:
"""Indicate that a class, function or overload is deprecated.
Usage:
@deprecated("Use B instead")
class A:
pass
@deprecated("Use g instead")
def f():
pass
@overload
@deprecated("int support is deprecated")
def g(x: int) -> int: ...
@overload
def g(x: str) -> int: ...
When this decorator is applied to an object, the type checker
will generate a diagnostic on usage of the deprecated object.
The warning specified by ``category`` will be emitted on use
of deprecated objects. For functions, that happens on calls;
for classes, on instantiation. If the ``category`` is ``None``,
no warning is emitted. The ``stacklevel`` determines where the
warning is emitted. If it is ``1`` (the default), the warning
is emitted at the direct caller of the deprecated object; if it
is higher, it is emitted further up the stack.
The decorator sets the ``__deprecated__``
attribute on the decorated object to the deprecation message
passed to the decorator. If applied to an overload, the decorator
must be after the ``@overload`` decorator for the attribute to
exist on the overload as returned by ``get_overloads()``.
See PEP 702 for details.
"""
def decorator(__arg: _T) -> _T:
if category is None:
__arg.__deprecated__ = __msg
return __arg
elif isinstance(__arg, type):
original_new = __arg.__new__
has_init = __arg.__init__ is not object.__init__
@functools.wraps(original_new)
def __new__(cls, *args, **kwargs):
warnings.warn(__msg, category=category, stacklevel=stacklevel + 1)
if original_new is not object.__new__:
return original_new(cls, *args, **kwargs)
# Mirrors a similar check in object.__new__.
elif not has_init and (args or kwargs):
raise TypeError(f"{cls.__name__}() takes no arguments")
else:
return original_new(cls)
__arg.__new__ = staticmethod(__new__)
__arg.__deprecated__ = __new__.__deprecated__ = __msg
return __arg
elif callable(__arg):
@functools.wraps(__arg)
def wrapper(*args, **kwargs):
warnings.warn(__msg, category=category, stacklevel=stacklevel + 1)
return __arg(*args, **kwargs)
__arg.__deprecated__ = wrapper.__deprecated__ = __msg
return wrapper
else:
raise TypeError(
"@deprecated decorator with non-None category must be applied to "
f"a class or callable, not {__arg!r}"
)
return decorator
# We have to do some monkey patching to deal with the dual nature of
# Unpack/TypeVarTuple:
# - We want Unpack to be a kind of TypeVar so it gets accepted in
# Generic[Unpack[Ts]]
# - We want it to *not* be treated as a TypeVar for the purposes of
# counting generic parameters, so that when we subscript a generic,
# the runtime doesn't try to substitute the Unpack with the subscripted type.
if not hasattr(typing, "TypeVarTuple"):
typing._collect_type_vars = _collect_type_vars
typing._check_generic = _check_generic
# Backport typing.NamedTuple as it exists in Python 3.12.
# In 3.11, the ability to define generic `NamedTuple`s was supported.
# This was explicitly disallowed in 3.9-3.10, and only half-worked in <=3.8.
# On 3.12, we added __orig_bases__ to call-based NamedTuples
# On 3.13, we deprecated kwargs-based NamedTuples
if sys.version_info >= (3, 13):
NamedTuple = typing.NamedTuple
else:
def _make_nmtuple(name, types, module, defaults=()):
fields = [n for n, t in types]
annotations = {n: typing._type_check(t, f"field {n} annotation must be a type")
for n, t in types}
nm_tpl = collections.namedtuple(name, fields,
defaults=defaults, module=module)
nm_tpl.__annotations__ = nm_tpl.__new__.__annotations__ = annotations
# The `_field_types` attribute was removed in 3.9;
# in earlier versions, it is the same as the `__annotations__` attribute
if sys.version_info < (3, 9):
nm_tpl._field_types = annotations
return nm_tpl
_prohibited_namedtuple_fields = typing._prohibited
_special_namedtuple_fields = frozenset({'__module__', '__name__', '__annotations__'})
class _NamedTupleMeta(type):
def __new__(cls, typename, bases, ns):
assert _NamedTuple in bases
for base in bases:
if base is not _NamedTuple and base is not typing.Generic:
raise TypeError(
'can only inherit from a NamedTuple type and Generic')
bases = tuple(tuple if base is _NamedTuple else base for base in bases)
types = ns.get('__annotations__', {})
default_names = []
for field_name in types:
if field_name in ns:
default_names.append(field_name)
elif default_names:
raise TypeError(f"Non-default namedtuple field {field_name} "
f"cannot follow default field"
f"{'s' if len(default_names) > 1 else ''} "
f"{', '.join(default_names)}")
nm_tpl = _make_nmtuple(
typename, types.items(),
defaults=[ns[n] for n in default_names],
module=ns['__module__']
)
nm_tpl.__bases__ = bases
if typing.Generic in bases:
if hasattr(typing, '_generic_class_getitem'): # 3.12+
nm_tpl.__class_getitem__ = classmethod(typing._generic_class_getitem)
else:
class_getitem = typing.Generic.__class_getitem__.__func__
nm_tpl.__class_getitem__ = classmethod(class_getitem)
# update from user namespace without overriding special namedtuple attributes
for key in ns:
if key in _prohibited_namedtuple_fields:
raise AttributeError("Cannot overwrite NamedTuple attribute " + key)
elif key not in _special_namedtuple_fields and key not in nm_tpl._fields:
setattr(nm_tpl, key, ns[key])
if typing.Generic in bases:
nm_tpl.__init_subclass__()
return nm_tpl
_NamedTuple = type.__new__(_NamedTupleMeta, 'NamedTuple', (), {})
def _namedtuple_mro_entries(bases):
assert NamedTuple in bases
return (_NamedTuple,)
@_ensure_subclassable(_namedtuple_mro_entries)
def NamedTuple(__typename, __fields=_marker, **kwargs):
"""Typed version of namedtuple.
Usage::
class Employee(NamedTuple):
name: str
id: int
This is equivalent to::
Employee = collections.namedtuple('Employee', ['name', 'id'])
The resulting class has an extra __annotations__ attribute, giving a
dict that maps field names to types. (The field names are also in
the _fields attribute, which is part of the namedtuple API.)
An alternative equivalent functional syntax is also accepted::
Employee = NamedTuple('Employee', [('name', str), ('id', int)])
"""
if __fields is _marker:
if kwargs:
deprecated_thing = "Creating NamedTuple classes using keyword arguments"
deprecation_msg = (
"{name} is deprecated and will be disallowed in Python {remove}. "
"Use the class-based or functional syntax instead."
)
else:
deprecated_thing = "Failing to pass a value for the 'fields' parameter"
example = f"`{__typename} = NamedTuple({__typename!r}, [])`"
deprecation_msg = (
"{name} is deprecated and will be disallowed in Python {remove}. "
"To create a NamedTuple class with 0 fields "
"using the functional syntax, "
"pass an empty list, e.g. "
) + example + "."
elif __fields is None:
if kwargs:
raise TypeError(
"Cannot pass `None` as the 'fields' parameter "
"and also specify fields using keyword arguments"
)
else:
deprecated_thing = "Passing `None` as the 'fields' parameter"
example = f"`{__typename} = NamedTuple({__typename!r}, [])`"
deprecation_msg = (
"{name} is deprecated and will be disallowed in Python {remove}. "
"To create a NamedTuple class with 0 fields "
"using the functional syntax, "
"pass an empty list, e.g. "
) + example + "."
elif kwargs:
raise TypeError("Either list of fields or keywords"
" can be provided to NamedTuple, not both")
if __fields is _marker or __fields is None:
warnings.warn(
deprecation_msg.format(name=deprecated_thing, remove="3.15"),
DeprecationWarning,
stacklevel=2,
)
__fields = kwargs.items()
nt = _make_nmtuple(__typename, __fields, module=_caller())
nt.__orig_bases__ = (NamedTuple,)
return nt
# On 3.8+, alter the signature so that it matches typing.NamedTuple.
# The signature of typing.NamedTuple on >=3.8 is invalid syntax in Python 3.7,
# so just leave the signature as it is on 3.7.
if sys.version_info >= (3, 8):
_new_signature = '(typename, fields=None, /, **kwargs)'
if isinstance(NamedTuple, _types.FunctionType):
NamedTuple.__text_signature__ = _new_signature
else:
NamedTuple.__call__.__text_signature__ = _new_signature
if hasattr(collections.abc, "Buffer"):
Buffer = collections.abc.Buffer
else:
class Buffer(abc.ABC):
"""Base class for classes that implement the buffer protocol.
The buffer protocol allows Python objects to expose a low-level
memory buffer interface. Before Python 3.12, it is not possible
to implement the buffer protocol in pure Python code, or even
to check whether a class implements the buffer protocol. In
Python 3.12 and higher, the ``__buffer__`` method allows access
to the buffer protocol from Python code, and the
``collections.abc.Buffer`` ABC allows checking whether a class
implements the buffer protocol.
To indicate support for the buffer protocol in earlier versions,
inherit from this ABC, either in a stub file or at runtime,
or use ABC registration. This ABC provides no methods, because
there is no Python-accessible methods shared by pre-3.12 buffer
classes. It is useful primarily for static checks.
"""
# As a courtesy, register the most common stdlib buffer classes.
Buffer.register(memoryview)
Buffer.register(bytearray)
Buffer.register(bytes)
# Backport of types.get_original_bases, available on 3.12+ in CPython
if hasattr(_types, "get_original_bases"):
get_original_bases = _types.get_original_bases
else:
def get_original_bases(__cls):
"""Return the class's "original" bases prior to modification by `__mro_entries__`.
Examples::
from typing import TypeVar, Generic
from metaflow._vendor.v3_7.typing_extensions import NamedTuple, TypedDict
T = TypeVar("T")
class Foo(Generic[T]): ...
class Bar(Foo[int], float): ...
class Baz(list[str]): ...
Eggs = NamedTuple("Eggs", [("a", int), ("b", str)])
Spam = TypedDict("Spam", {"a": int, "b": str})
assert get_original_bases(Bar) == (Foo[int], float)
assert get_original_bases(Baz) == (list[str],)
assert get_original_bases(Eggs) == (NamedTuple,)
assert get_original_bases(Spam) == (TypedDict,)
assert get_original_bases(int) == (object,)
"""
try:
return __cls.__orig_bases__
except AttributeError:
try:
return __cls.__bases__
except AttributeError:
raise TypeError(
f'Expected an instance of type, not {type(__cls).__name__!r}'
) from None
# NewType is a class on Python 3.10+, making it pickleable
# The error message for subclassing instances of NewType was improved on 3.11+
if sys.version_info >= (3, 11):
NewType = typing.NewType
else:
class NewType:
"""NewType creates simple unique types with almost zero
runtime overhead. NewType(name, tp) is considered a subtype of tp
by static type checkers. At runtime, NewType(name, tp) returns
a dummy callable that simply returns its argument. Usage::
UserId = NewType('UserId', int)
def name_by_id(user_id: UserId) -> str:
...
UserId('user') # Fails type check
name_by_id(42) # Fails type check
name_by_id(UserId(42)) # OK
num = UserId(5) + 1 # type: int
"""
def __call__(self, obj):
return obj
def __init__(self, name, tp):
self.__qualname__ = name
if '.' in name:
name = name.rpartition('.')[-1]
self.__name__ = name
self.__supertype__ = tp
def_mod = _caller()
if def_mod != 'typing_extensions':
self.__module__ = def_mod
def __mro_entries__(self, bases):
# We defined __mro_entries__ to get a better error message
# if a user attempts to subclass a NewType instance. bpo-46170
supercls_name = self.__name__
class Dummy:
def __init_subclass__(cls):
subcls_name = cls.__name__
raise TypeError(
f"Cannot subclass an instance of NewType. "
f"Perhaps you were looking for: "
f"`{subcls_name} = NewType({subcls_name!r}, {supercls_name})`"
)
return (Dummy,)
def __repr__(self):
return f'{self.__module__}.{self.__qualname__}'
def __reduce__(self):
return self.__qualname__
if sys.version_info >= (3, 10):
# PEP 604 methods
# It doesn't make sense to have these methods on Python <3.10
def __or__(self, other):
return typing.Union[self, other]
def __ror__(self, other):
return typing.Union[other, self]
if hasattr(typing, "TypeAliasType"):
TypeAliasType = typing.TypeAliasType
else:
def _is_unionable(obj):
"""Corresponds to is_unionable() in unionobject.c in CPython."""
return obj is None or isinstance(obj, (
type,
_types.GenericAlias,
_types.UnionType,
TypeAliasType,
))
class TypeAliasType:
"""Create named, parameterized type aliases.
This provides a backport of the new `type` statement in Python 3.12:
type ListOrSet[T] = list[T] | set[T]
is equivalent to:
T = TypeVar("T")
ListOrSet = TypeAliasType("ListOrSet", list[T] | set[T], type_params=(T,))
The name ListOrSet can then be used as an alias for the type it refers to.
The type_params argument should contain all the type parameters used
in the value of the type alias. If the alias is not generic, this
argument is omitted.
Static type checkers should only support type aliases declared using
TypeAliasType that follow these rules:
- The first argument (the name) must be a string literal.
- The TypeAliasType instance must be immediately assigned to a variable
of the same name. (For example, 'X = TypeAliasType("Y", int)' is invalid,
as is 'X, Y = TypeAliasType("X", int), TypeAliasType("Y", int)').
"""
def __init__(self, name: str, value, *, type_params=()):
if not isinstance(name, str):
raise TypeError("TypeAliasType name must be a string")
self.__value__ = value
self.__type_params__ = type_params
parameters = []
for type_param in type_params:
if isinstance(type_param, TypeVarTuple):
parameters.extend(type_param)
else:
parameters.append(type_param)
self.__parameters__ = tuple(parameters)
def_mod = _caller()
if def_mod != 'typing_extensions':
self.__module__ = def_mod
# Setting this attribute closes the TypeAliasType from further modification
self.__name__ = name
def __setattr__(self, __name: str, __value: object) -> None:
if hasattr(self, "__name__"):
self._raise_attribute_error(__name)
super().__setattr__(__name, __value)
def __delattr__(self, __name: str) -> Never:
self._raise_attribute_error(__name)
def _raise_attribute_error(self, name: str) -> Never:
# Match the Python 3.12 error messages exactly
if name == "__name__":
raise AttributeError("readonly attribute")
elif name in {"__value__", "__type_params__", "__parameters__", "__module__"}:
raise AttributeError(
f"attribute '{name}' of 'typing.TypeAliasType' objects "
"is not writable"
)
else:
raise AttributeError(
f"'typing.TypeAliasType' object has no attribute '{name}'"
)
def __repr__(self) -> str:
return self.__name__
def __getitem__(self, parameters):
if not isinstance(parameters, tuple):
parameters = (parameters,)
parameters = [
typing._type_check(
item, f'Subscripting {self.__name__} requires a type.'
)
for item in parameters
]
return typing._GenericAlias(self, tuple(parameters))
def __reduce__(self):
return self.__name__
def __init_subclass__(cls, *args, **kwargs):
raise TypeError(
"type 'typing_extensions.TypeAliasType' is not an acceptable base type"
)
# The presence of this method convinces typing._type_check
# that TypeAliasTypes are types.
def __call__(self):
raise TypeError("Type alias is not callable")
if sys.version_info >= (3, 10):
def __or__(self, right):
# For forward compatibility with 3.12, reject Unions
# that are not accepted by the built-in Union.
if not _is_unionable(right):
return NotImplemented
return typing.Union[self, right]
def __ror__(self, left):
if not _is_unionable(left):
return NotImplemented
return typing.Union[left, self]
if hasattr(typing, "is_protocol"):
is_protocol = typing.is_protocol
get_protocol_members = typing.get_protocol_members
else:
def is_protocol(__tp: type) -> bool:
"""Return True if the given type is a Protocol.
Example::
>>> from typing_extensions import Protocol, is_protocol
>>> class P(Protocol):
... def a(self) -> str: ...
... b: int
>>> is_protocol(P)
True
>>> is_protocol(int)
False
"""
return (
isinstance(__tp, type)
and getattr(__tp, '_is_protocol', False)
and __tp is not Protocol
and __tp is not getattr(typing, "Protocol", object())
)
def get_protocol_members(__tp: type) -> typing.FrozenSet[str]:
"""Return the set of members defined in a Protocol.
Example::
>>> from typing_extensions import Protocol, get_protocol_members
>>> class P(Protocol):
... def a(self) -> str: ...
... b: int
>>> get_protocol_members(P)
frozenset({'a', 'b'})
Raise a TypeError for arguments that are not Protocols.
"""
if not is_protocol(__tp):
raise TypeError(f'{__tp!r} is not a Protocol')
if hasattr(__tp, '__protocol_attrs__'):
return frozenset(__tp.__protocol_attrs__)
return frozenset(_get_protocol_attrs(__tp))
# Aliases for items that have always been in typing.
# Explicitly assign these (rather than using `from typing import *` at the top),
# so that we get a CI error if one of these is deleted from typing.py
# in a future version of Python
AbstractSet = typing.AbstractSet
AnyStr = typing.AnyStr
BinaryIO = typing.BinaryIO
Callable = typing.Callable
Collection = typing.Collection
Container = typing.Container
Dict = typing.Dict
ForwardRef = typing.ForwardRef
FrozenSet = typing.FrozenSet
Generator = typing.Generator
Generic = typing.Generic
Hashable = typing.Hashable
IO = typing.IO
ItemsView = typing.ItemsView
Iterable = typing.Iterable
Iterator = typing.Iterator
KeysView = typing.KeysView
List = typing.List
Mapping = typing.Mapping
MappingView = typing.MappingView
Match = typing.Match
MutableMapping = typing.MutableMapping
MutableSequence = typing.MutableSequence
MutableSet = typing.MutableSet
Optional = typing.Optional
Pattern = typing.Pattern
Reversible = typing.Reversible
Sequence = typing.Sequence
Set = typing.Set
Sized = typing.Sized
TextIO = typing.TextIO
Tuple = typing.Tuple
Union = typing.Union
ValuesView = typing.ValuesView
cast = typing.cast
no_type_check = typing.no_type_check
no_type_check_decorator = typing.no_type_check_decorator
================================================
FILE: metaflow/_vendor/v3_7/zipp.LICENSE
================================================
Copyright Jason R. Coombs
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
================================================
FILE: metaflow/_vendor/v3_7/zipp.py
================================================
import io
import posixpath
import zipfile
import itertools
import contextlib
import sys
import pathlib
if sys.version_info < (3, 7):
from collections import OrderedDict
else:
OrderedDict = dict
__all__ = ['Path']
def _parents(path):
"""
Given a path with elements separated by
posixpath.sep, generate all parents of that path.
>>> list(_parents('b/d'))
['b']
>>> list(_parents('/b/d/'))
['/b']
>>> list(_parents('b/d/f/'))
['b/d', 'b']
>>> list(_parents('b'))
[]
>>> list(_parents(''))
[]
"""
return itertools.islice(_ancestry(path), 1, None)
def _ancestry(path):
"""
Given a path with elements separated by
posixpath.sep, generate all elements of that path
>>> list(_ancestry('b/d'))
['b/d', 'b']
>>> list(_ancestry('/b/d/'))
['/b/d', '/b']
>>> list(_ancestry('b/d/f/'))
['b/d/f', 'b/d', 'b']
>>> list(_ancestry('b'))
['b']
>>> list(_ancestry(''))
[]
"""
path = path.rstrip(posixpath.sep)
while path and path != posixpath.sep:
yield path
path, tail = posixpath.split(path)
_dedupe = OrderedDict.fromkeys
"""Deduplicate an iterable in original order"""
def _difference(minuend, subtrahend):
"""
Return items in minuend not in subtrahend, retaining order
with O(1) lookup.
"""
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
class CompleteDirs(zipfile.ZipFile):
"""
A ZipFile subclass that ensures that implied directories
are always included in the namelist.
"""
@staticmethod
def _implied_dirs(names):
parents = itertools.chain.from_iterable(map(_parents, names))
as_dirs = (p + posixpath.sep for p in parents)
return _dedupe(_difference(as_dirs, names))
def namelist(self):
names = super(CompleteDirs, self).namelist()
return names + list(self._implied_dirs(names))
def _name_set(self):
return set(self.namelist())
def resolve_dir(self, name):
"""
If the name represents a directory, return that name
as a directory (with the trailing slash).
"""
names = self._name_set()
dirname = name + '/'
dir_match = name not in names and dirname in names
return dirname if dir_match else name
@classmethod
def make(cls, source):
"""
Given a source (filename or zipfile), return an
appropriate CompleteDirs subclass.
"""
if isinstance(source, CompleteDirs):
return source
if not isinstance(source, zipfile.ZipFile):
return cls(_pathlib_compat(source))
# Only allow for FastLookup when supplied zipfile is read-only
if 'r' not in source.mode:
cls = CompleteDirs
source.__class__ = cls
return source
class FastLookup(CompleteDirs):
"""
ZipFile subclass to ensure implicit
dirs exist and are resolved rapidly.
"""
def namelist(self):
with contextlib.suppress(AttributeError):
return self.__names
self.__names = super(FastLookup, self).namelist()
return self.__names
def _name_set(self):
with contextlib.suppress(AttributeError):
return self.__lookup
self.__lookup = super(FastLookup, self)._name_set()
return self.__lookup
def _pathlib_compat(path):
"""
For path-like objects, convert to a filename for compatibility
on Python 3.6.1 and earlier.
"""
try:
return path.__fspath__()
except AttributeError:
return str(path)
class Path:
"""
A pathlib-compatible interface for zip files.
Consider a zip file with this structure::
.
├── a.txt
└── b
├── c.txt
└── d
└── e.txt
>>> data = io.BytesIO()
>>> zf = zipfile.ZipFile(data, 'w')
>>> zf.writestr('a.txt', 'content of a')
>>> zf.writestr('b/c.txt', 'content of c')
>>> zf.writestr('b/d/e.txt', 'content of e')
>>> zf.filename = 'mem/abcde.zip'
Path accepts the zipfile object itself or a filename
>>> root = Path(zf)
From there, several path operations are available.
Directory iteration (including the zip file itself):
>>> a, b = root.iterdir()
>>> a
Path('mem/abcde.zip', 'a.txt')
>>> b
Path('mem/abcde.zip', 'b/')
name property:
>>> b.name
'b'
join with divide operator:
>>> c = b / 'c.txt'
>>> c
Path('mem/abcde.zip', 'b/c.txt')
>>> c.name
'c.txt'
Read text:
>>> c.read_text()
'content of c'
existence:
>>> c.exists()
True
>>> (b / 'missing.txt').exists()
False
Coercion to string:
>>> import os
>>> str(c).replace(os.sep, posixpath.sep)
'mem/abcde.zip/b/c.txt'
At the root, ``name``, ``filename``, and ``parent``
resolve to the zipfile. Note these attributes are not
valid and will raise a ``ValueError`` if the zipfile
has no filename.
>>> root.name
'abcde.zip'
>>> str(root.filename).replace(os.sep, posixpath.sep)
'mem/abcde.zip'
>>> str(root.parent)
'mem'
"""
__repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
def __init__(self, root, at=""):
"""
Construct a Path from a ZipFile or filename.
Note: When the source is an existing ZipFile object,
its type (__class__) will be mutated to a
specialized type. If the caller wishes to retain the
original type, the caller should either create a
separate ZipFile object or pass a filename.
"""
self.root = FastLookup.make(root)
self.at = at
def open(self, mode='r', *args, pwd=None, **kwargs):
"""
Open this entry as text or binary following the semantics
of ``pathlib.Path.open()`` by passing arguments through
to io.TextIOWrapper().
"""
if self.is_dir():
raise IsADirectoryError(self)
zip_mode = mode[0]
if not self.exists() and zip_mode == 'r':
raise FileNotFoundError(self)
stream = self.root.open(self.at, zip_mode, pwd=pwd)
if 'b' in mode:
if args or kwargs:
raise ValueError("encoding args invalid for binary operation")
return stream
return io.TextIOWrapper(stream, *args, **kwargs)
@property
def name(self):
return pathlib.Path(self.at).name or self.filename.name
@property
def suffix(self):
return pathlib.Path(self.at).suffix or self.filename.suffix
@property
def suffixes(self):
return pathlib.Path(self.at).suffixes or self.filename.suffixes
@property
def stem(self):
return pathlib.Path(self.at).stem or self.filename.stem
@property
def filename(self):
return pathlib.Path(self.root.filename).joinpath(self.at)
def read_text(self, *args, **kwargs):
with self.open('r', *args, **kwargs) as strm:
return strm.read()
def read_bytes(self):
with self.open('rb') as strm:
return strm.read()
def _is_child(self, path):
return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
def _next(self, at):
return self.__class__(self.root, at)
def is_dir(self):
return not self.at or self.at.endswith("/")
def is_file(self):
return self.exists() and not self.is_dir()
def exists(self):
return self.at in self.root._name_set()
def iterdir(self):
if not self.is_dir():
raise ValueError("Can't listdir a file")
subs = map(self._next, self.root.namelist())
return filter(self._is_child, subs)
def __str__(self):
return posixpath.join(self.root.filename, self.at)
def __repr__(self):
return self.__repr.format(self=self)
def joinpath(self, *other):
next = posixpath.join(self.at, *map(_pathlib_compat, other))
return self._next(self.root.resolve_dir(next))
__truediv__ = joinpath
@property
def parent(self):
if not self.at:
return self.filename.parent
parent_at = posixpath.dirname(self.at.rstrip('/'))
if parent_at:
parent_at += '/'
return self._next(parent_at)
================================================
FILE: metaflow/_vendor/vendor_any.txt
================================================
click==7.1.2
packaging==23.0
importlib_metadata==4.8.3
typeguard==4.4.0
typing_extensions==4.12.2
zipp==3.6.0
standard-imghdr==3.13.0
pyyaml==5.3.1
================================================
FILE: metaflow/_vendor/vendor_v3_6.txt
================================================
importlib_metadata==4.8.3
typing_extensions==4.1.1
zipp==3.6.0
================================================
FILE: metaflow/_vendor/vendor_v3_7.txt
================================================
importlib_metadata==4.8.3
typeguard==4.1.2
typing_extensions==4.7.1
zipp==3.6.0
================================================
FILE: metaflow/_vendor/yaml/__init__.py
================================================
from .error import *
from .tokens import *
from .events import *
from .nodes import *
from .loader import *
from .dumper import *
__version__ = '5.3.1'
try:
from .cyaml import *
__with_libyaml__ = True
except ImportError:
__with_libyaml__ = False
import io
#------------------------------------------------------------------------------
# Warnings control
#------------------------------------------------------------------------------
# 'Global' warnings state:
_warnings_enabled = {
'YAMLLoadWarning': True,
}
# Get or set global warnings' state
def warnings(settings=None):
if settings is None:
return _warnings_enabled
if type(settings) is dict:
for key in settings:
if key in _warnings_enabled:
_warnings_enabled[key] = settings[key]
# Warn when load() is called without Loader=...
class YAMLLoadWarning(RuntimeWarning):
pass
def load_warning(method):
if _warnings_enabled['YAMLLoadWarning'] is False:
return
import warnings
message = (
"calling yaml.%s() without Loader=... is deprecated, as the "
"default Loader is unsafe. Please read "
"https://msg.pyyaml.org/load for full details."
) % method
warnings.warn(message, YAMLLoadWarning, stacklevel=3)
#------------------------------------------------------------------------------
def scan(stream, Loader=Loader):
"""
Scan a YAML stream and produce scanning tokens.
"""
loader = Loader(stream)
try:
while loader.check_token():
yield loader.get_token()
finally:
loader.dispose()
def parse(stream, Loader=Loader):
"""
Parse a YAML stream and produce parsing events.
"""
loader = Loader(stream)
try:
while loader.check_event():
yield loader.get_event()
finally:
loader.dispose()
def compose(stream, Loader=Loader):
"""
Parse the first YAML document in a stream
and produce the corresponding representation tree.
"""
loader = Loader(stream)
try:
return loader.get_single_node()
finally:
loader.dispose()
def compose_all(stream, Loader=Loader):
"""
Parse all YAML documents in a stream
and produce corresponding representation trees.
"""
loader = Loader(stream)
try:
while loader.check_node():
yield loader.get_node()
finally:
loader.dispose()
def load(stream, Loader=None):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
"""
if Loader is None:
load_warning('load')
Loader = FullLoader
loader = Loader(stream)
try:
return loader.get_single_data()
finally:
loader.dispose()
def load_all(stream, Loader=None):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
"""
if Loader is None:
load_warning('load_all')
Loader = FullLoader
loader = Loader(stream)
try:
while loader.check_data():
yield loader.get_data()
finally:
loader.dispose()
def full_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve all tags except those known to be
unsafe on untrusted input.
"""
return load(stream, FullLoader)
def full_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve all tags except those known to be
unsafe on untrusted input.
"""
return load_all(stream, FullLoader)
def safe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve only basic YAML tags. This is known
to be safe for untrusted input.
"""
return load(stream, SafeLoader)
def safe_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve only basic YAML tags. This is known
to be safe for untrusted input.
"""
return load_all(stream, SafeLoader)
def unsafe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve all tags, even those known to be
unsafe on untrusted input.
"""
return load(stream, UnsafeLoader)
def unsafe_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve all tags, even those known to be
unsafe on untrusted input.
"""
return load_all(stream, UnsafeLoader)
def emit(events, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None):
"""
Emit YAML parsing events into a stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
stream = io.StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
try:
for event in events:
dumper.emit(event)
finally:
dumper.dispose()
if getvalue:
return getvalue()
def serialize_all(nodes, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
"""
Serialize a sequence of representation trees into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
stream = io.StringIO()
else:
stream = io.BytesIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end)
try:
dumper.open()
for node in nodes:
dumper.serialize(node)
dumper.close()
finally:
dumper.dispose()
if getvalue:
return getvalue()
def serialize(node, stream=None, Dumper=Dumper, **kwds):
"""
Serialize a representation tree into a YAML stream.
If stream is None, return the produced string instead.
"""
return serialize_all([node], stream, Dumper=Dumper, **kwds)
def dump_all(documents, stream=None, Dumper=Dumper,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
"""
Serialize a sequence of Python objects into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
stream = io.StringIO()
else:
stream = io.BytesIO()
getvalue = stream.getvalue
dumper = Dumper(stream, default_style=default_style,
default_flow_style=default_flow_style,
canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys)
try:
dumper.open()
for data in documents:
dumper.represent(data)
dumper.close()
finally:
dumper.dispose()
if getvalue:
return getvalue()
def dump(data, stream=None, Dumper=Dumper, **kwds):
"""
Serialize a Python object into a YAML stream.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=Dumper, **kwds)
def safe_dump_all(documents, stream=None, **kwds):
"""
Serialize a sequence of Python objects into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all(documents, stream, Dumper=SafeDumper, **kwds)
def safe_dump(data, stream=None, **kwds):
"""
Serialize a Python object into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=SafeDumper, **kwds)
def add_implicit_resolver(tag, regexp, first=None,
Loader=None, Dumper=Dumper):
"""
Add an implicit scalar detector.
If an implicit scalar value matches the given regexp,
the corresponding tag is assigned to the scalar.
first is a sequence of possible initial characters or None.
"""
if Loader is None:
loader.Loader.add_implicit_resolver(tag, regexp, first)
loader.FullLoader.add_implicit_resolver(tag, regexp, first)
loader.UnsafeLoader.add_implicit_resolver(tag, regexp, first)
else:
Loader.add_implicit_resolver(tag, regexp, first)
Dumper.add_implicit_resolver(tag, regexp, first)
def add_path_resolver(tag, path, kind=None, Loader=None, Dumper=Dumper):
"""
Add a path based resolver for the given tag.
A path is a list of keys that forms a path
to a node in the representation tree.
Keys can be string values, integers, or None.
"""
if Loader is None:
loader.Loader.add_path_resolver(tag, path, kind)
loader.FullLoader.add_path_resolver(tag, path, kind)
loader.UnsafeLoader.add_path_resolver(tag, path, kind)
else:
Loader.add_path_resolver(tag, path, kind)
Dumper.add_path_resolver(tag, path, kind)
def add_constructor(tag, constructor, Loader=None):
"""
Add a constructor for the given tag.
Constructor is a function that accepts a Loader instance
and a node object and produces the corresponding Python object.
"""
if Loader is None:
loader.Loader.add_constructor(tag, constructor)
loader.FullLoader.add_constructor(tag, constructor)
loader.UnsafeLoader.add_constructor(tag, constructor)
else:
Loader.add_constructor(tag, constructor)
def add_multi_constructor(tag_prefix, multi_constructor, Loader=None):
"""
Add a multi-constructor for the given tag prefix.
Multi-constructor is called for a node if its tag starts with tag_prefix.
Multi-constructor accepts a Loader instance, a tag suffix,
and a node object and produces the corresponding Python object.
"""
if Loader is None:
loader.Loader.add_multi_constructor(tag_prefix, multi_constructor)
loader.FullLoader.add_multi_constructor(tag_prefix, multi_constructor)
loader.UnsafeLoader.add_multi_constructor(tag_prefix, multi_constructor)
else:
Loader.add_multi_constructor(tag_prefix, multi_constructor)
def add_representer(data_type, representer, Dumper=Dumper):
"""
Add a representer for the given type.
Representer is a function accepting a Dumper instance
and an instance of the given data type
and producing the corresponding representation node.
"""
Dumper.add_representer(data_type, representer)
def add_multi_representer(data_type, multi_representer, Dumper=Dumper):
"""
Add a representer for the given type.
Multi-representer is a function accepting a Dumper instance
and an instance of the given data type or subtype
and producing the corresponding representation node.
"""
Dumper.add_multi_representer(data_type, multi_representer)
class YAMLObjectMetaclass(type):
"""
The metaclass for YAMLObject.
"""
def __init__(cls, name, bases, kwds):
super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds)
if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None:
if isinstance(cls.yaml_loader, list):
for loader in cls.yaml_loader:
loader.add_constructor(cls.yaml_tag, cls.from_yaml)
else:
cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml)
cls.yaml_dumper.add_representer(cls, cls.to_yaml)
class YAMLObject(metaclass=YAMLObjectMetaclass):
"""
An object that can dump itself to a YAML stream
and load itself from a YAML stream.
"""
__slots__ = () # no direct instantiation, so allow immutable subclasses
yaml_loader = [Loader, FullLoader, UnsafeLoader]
yaml_dumper = Dumper
yaml_tag = None
yaml_flow_style = None
@classmethod
def from_yaml(cls, loader, node):
"""
Convert a representation node to a Python object.
"""
return loader.construct_yaml_object(node, cls)
@classmethod
def to_yaml(cls, dumper, data):
"""
Convert a Python object to a representation node.
"""
return dumper.represent_yaml_object(cls.yaml_tag, data, cls,
flow_style=cls.yaml_flow_style)
================================================
FILE: metaflow/_vendor/yaml/composer.py
================================================
__all__ = ['Composer', 'ComposerError']
from .error import MarkedYAMLError
from .events import *
from .nodes import *
class ComposerError(MarkedYAMLError):
pass
class Composer:
def __init__(self):
self.anchors = {}
def check_node(self):
# Drop the STREAM-START event.
if self.check_event(StreamStartEvent):
self.get_event()
# If there are more documents available?
return not self.check_event(StreamEndEvent)
def get_node(self):
# Get the root node of the next document.
if not self.check_event(StreamEndEvent):
return self.compose_document()
def get_single_node(self):
# Drop the STREAM-START event.
self.get_event()
# Compose a document if the stream is not empty.
document = None
if not self.check_event(StreamEndEvent):
document = self.compose_document()
# Ensure that the stream contains no more documents.
if not self.check_event(StreamEndEvent):
event = self.get_event()
raise ComposerError("expected a single document in the stream",
document.start_mark, "but found another document",
event.start_mark)
# Drop the STREAM-END event.
self.get_event()
return document
def compose_document(self):
# Drop the DOCUMENT-START event.
self.get_event()
# Compose the root node.
node = self.compose_node(None, None)
# Drop the DOCUMENT-END event.
self.get_event()
self.anchors = {}
return node
def compose_node(self, parent, index):
if self.check_event(AliasEvent):
event = self.get_event()
anchor = event.anchor
if anchor not in self.anchors:
raise ComposerError(None, None, "found undefined alias %r"
% anchor, event.start_mark)
return self.anchors[anchor]
event = self.peek_event()
anchor = event.anchor
if anchor is not None:
if anchor in self.anchors:
raise ComposerError("found duplicate anchor %r; first occurrence"
% anchor, self.anchors[anchor].start_mark,
"second occurrence", event.start_mark)
self.descend_resolver(parent, index)
if self.check_event(ScalarEvent):
node = self.compose_scalar_node(anchor)
elif self.check_event(SequenceStartEvent):
node = self.compose_sequence_node(anchor)
elif self.check_event(MappingStartEvent):
node = self.compose_mapping_node(anchor)
self.ascend_resolver()
return node
def compose_scalar_node(self, anchor):
event = self.get_event()
tag = event.tag
if tag is None or tag == '!':
tag = self.resolve(ScalarNode, event.value, event.implicit)
node = ScalarNode(tag, event.value,
event.start_mark, event.end_mark, style=event.style)
if anchor is not None:
self.anchors[anchor] = node
return node
def compose_sequence_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == '!':
tag = self.resolve(SequenceNode, None, start_event.implicit)
node = SequenceNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
index = 0
while not self.check_event(SequenceEndEvent):
node.value.append(self.compose_node(node, index))
index += 1
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
def compose_mapping_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == '!':
tag = self.resolve(MappingNode, None, start_event.implicit)
node = MappingNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
while not self.check_event(MappingEndEvent):
#key_event = self.peek_event()
item_key = self.compose_node(node, None)
#if item_key in node.value:
# raise ComposerError("while composing a mapping", start_event.start_mark,
# "found duplicate key", key_event.start_mark)
item_value = self.compose_node(node, item_key)
#node.value[item_key] = item_value
node.value.append((item_key, item_value))
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
================================================
FILE: metaflow/_vendor/yaml/constructor.py
================================================
__all__ = [
'BaseConstructor',
'SafeConstructor',
'FullConstructor',
'UnsafeConstructor',
'Constructor',
'ConstructorError'
]
from .error import *
from .nodes import *
import collections.abc, datetime, base64, binascii, re, sys, types
class ConstructorError(MarkedYAMLError):
pass
class BaseConstructor:
yaml_constructors = {}
yaml_multi_constructors = {}
def __init__(self):
self.constructed_objects = {}
self.recursive_objects = {}
self.state_generators = []
self.deep_construct = False
def check_data(self):
# If there are more documents available?
return self.check_node()
def check_state_key(self, key):
"""Block special attributes/methods from being set in a newly created
object, to prevent user-controlled methods from being called during
deserialization"""
if self.get_state_keys_blacklist_regexp().match(key):
raise ConstructorError(None, None,
"blacklisted key '%s' in instance state found" % (key,), None)
def get_data(self):
# Construct and return the next document.
if self.check_node():
return self.construct_document(self.get_node())
def get_single_data(self):
# Ensure that the stream contains a single document and construct it.
node = self.get_single_node()
if node is not None:
return self.construct_document(node)
return None
def construct_document(self, node):
data = self.construct_object(node)
while self.state_generators:
state_generators = self.state_generators
self.state_generators = []
for generator in state_generators:
for dummy in generator:
pass
self.constructed_objects = {}
self.recursive_objects = {}
self.deep_construct = False
return data
def construct_object(self, node, deep=False):
if node in self.constructed_objects:
return self.constructed_objects[node]
if deep:
old_deep = self.deep_construct
self.deep_construct = True
if node in self.recursive_objects:
raise ConstructorError(None, None,
"found unconstructable recursive node", node.start_mark)
self.recursive_objects[node] = None
constructor = None
tag_suffix = None
if node.tag in self.yaml_constructors:
constructor = self.yaml_constructors[node.tag]
else:
for tag_prefix in self.yaml_multi_constructors:
if tag_prefix is not None and node.tag.startswith(tag_prefix):
tag_suffix = node.tag[len(tag_prefix):]
constructor = self.yaml_multi_constructors[tag_prefix]
break
else:
if None in self.yaml_multi_constructors:
tag_suffix = node.tag
constructor = self.yaml_multi_constructors[None]
elif None in self.yaml_constructors:
constructor = self.yaml_constructors[None]
elif isinstance(node, ScalarNode):
constructor = self.__class__.construct_scalar
elif isinstance(node, SequenceNode):
constructor = self.__class__.construct_sequence
elif isinstance(node, MappingNode):
constructor = self.__class__.construct_mapping
if tag_suffix is None:
data = constructor(self, node)
else:
data = constructor(self, tag_suffix, node)
if isinstance(data, types.GeneratorType):
generator = data
data = next(generator)
if self.deep_construct:
for dummy in generator:
pass
else:
self.state_generators.append(generator)
self.constructed_objects[node] = data
del self.recursive_objects[node]
if deep:
self.deep_construct = old_deep
return data
def construct_scalar(self, node):
if not isinstance(node, ScalarNode):
raise ConstructorError(None, None,
"expected a scalar node, but found %s" % node.id,
node.start_mark)
return node.value
def construct_sequence(self, node, deep=False):
if not isinstance(node, SequenceNode):
raise ConstructorError(None, None,
"expected a sequence node, but found %s" % node.id,
node.start_mark)
return [self.construct_object(child, deep=deep)
for child in node.value]
def construct_mapping(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if not isinstance(key, collections.abc.Hashable):
raise ConstructorError("while constructing a mapping", node.start_mark,
"found unhashable key", key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping
def construct_pairs(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
pairs = []
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
value = self.construct_object(value_node, deep=deep)
pairs.append((key, value))
return pairs
@classmethod
def add_constructor(cls, tag, constructor):
if not 'yaml_constructors' in cls.__dict__:
cls.yaml_constructors = cls.yaml_constructors.copy()
cls.yaml_constructors[tag] = constructor
@classmethod
def add_multi_constructor(cls, tag_prefix, multi_constructor):
if not 'yaml_multi_constructors' in cls.__dict__:
cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy()
cls.yaml_multi_constructors[tag_prefix] = multi_constructor
class SafeConstructor(BaseConstructor):
def construct_scalar(self, node):
if isinstance(node, MappingNode):
for key_node, value_node in node.value:
if key_node.tag == 'tag:yaml.org,2002:value':
return self.construct_scalar(value_node)
return super().construct_scalar(node)
def flatten_mapping(self, node):
merge = []
index = 0
while index < len(node.value):
key_node, value_node = node.value[index]
if key_node.tag == 'tag:yaml.org,2002:merge':
del node.value[index]
if isinstance(value_node, MappingNode):
self.flatten_mapping(value_node)
merge.extend(value_node.value)
elif isinstance(value_node, SequenceNode):
submerge = []
for subnode in value_node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing a mapping",
node.start_mark,
"expected a mapping for merging, but found %s"
% subnode.id, subnode.start_mark)
self.flatten_mapping(subnode)
submerge.append(subnode.value)
submerge.reverse()
for value in submerge:
merge.extend(value)
else:
raise ConstructorError("while constructing a mapping", node.start_mark,
"expected a mapping or list of mappings for merging, but found %s"
% value_node.id, value_node.start_mark)
elif key_node.tag == 'tag:yaml.org,2002:value':
key_node.tag = 'tag:yaml.org,2002:str'
index += 1
else:
index += 1
if merge:
node.value = merge + node.value
def construct_mapping(self, node, deep=False):
if isinstance(node, MappingNode):
self.flatten_mapping(node)
return super().construct_mapping(node, deep=deep)
def construct_yaml_null(self, node):
self.construct_scalar(node)
return None
bool_values = {
'yes': True,
'no': False,
'true': True,
'false': False,
'on': True,
'off': False,
}
def construct_yaml_bool(self, node):
value = self.construct_scalar(node)
return self.bool_values[value.lower()]
def construct_yaml_int(self, node):
value = self.construct_scalar(node)
value = value.replace('_', '')
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '0':
return 0
elif value.startswith('0b'):
return sign*int(value[2:], 2)
elif value.startswith('0x'):
return sign*int(value[2:], 16)
elif value[0] == '0':
return sign*int(value, 8)
elif ':' in value:
digits = [int(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*int(value)
inf_value = 1e300
while inf_value != inf_value*inf_value:
inf_value *= inf_value
nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99).
def construct_yaml_float(self, node):
value = self.construct_scalar(node)
value = value.replace('_', '').lower()
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '.inf':
return sign*self.inf_value
elif value == '.nan':
return self.nan_value
elif ':' in value:
digits = [float(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0.0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*float(value)
def construct_yaml_binary(self, node):
try:
value = self.construct_scalar(node).encode('ascii')
except UnicodeEncodeError as exc:
raise ConstructorError(None, None,
"failed to convert base64 data into ascii: %s" % exc,
node.start_mark)
try:
if hasattr(base64, 'decodebytes'):
return base64.decodebytes(value)
else:
return base64.decodestring(value)
except binascii.Error as exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
timestamp_regexp = re.compile(
r'''^(?P[0-9][0-9][0-9][0-9])
-(?P[0-9][0-9]?)
-(?P[0-9][0-9]?)
(?:(?:[Tt]|[ \t]+)
(?P[0-9][0-9]?)
:(?P[0-9][0-9])
:(?P[0-9][0-9])
(?:\.(?P[0-9]*))?
(?:[ \t]*(?PZ|(?P[-+])(?P[0-9][0-9]?)
(?::(?P[0-9][0-9]))?))?)?$''', re.X)
def construct_yaml_timestamp(self, node):
value = self.construct_scalar(node)
match = self.timestamp_regexp.match(node.value)
values = match.groupdict()
year = int(values['year'])
month = int(values['month'])
day = int(values['day'])
if not values['hour']:
return datetime.date(year, month, day)
hour = int(values['hour'])
minute = int(values['minute'])
second = int(values['second'])
fraction = 0
tzinfo = None
if values['fraction']:
fraction = values['fraction'][:6]
while len(fraction) < 6:
fraction += '0'
fraction = int(fraction)
if values['tz_sign']:
tz_hour = int(values['tz_hour'])
tz_minute = int(values['tz_minute'] or 0)
delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute)
if values['tz_sign'] == '-':
delta = -delta
tzinfo = datetime.timezone(delta)
elif values['tz']:
tzinfo = datetime.timezone.utc
return datetime.datetime(year, month, day, hour, minute, second, fraction,
tzinfo=tzinfo)
def construct_yaml_omap(self, node):
# Note: we do not check for duplicate keys, because it's too
# CPU-expensive.
omap = []
yield omap
if not isinstance(node, SequenceNode):
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a sequence, but found %s" % node.id, node.start_mark)
for subnode in node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
subnode.start_mark)
if len(subnode.value) != 1:
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
subnode.start_mark)
key_node, value_node = subnode.value[0]
key = self.construct_object(key_node)
value = self.construct_object(value_node)
omap.append((key, value))
def construct_yaml_pairs(self, node):
# Note: the same code as `construct_yaml_omap`.
pairs = []
yield pairs
if not isinstance(node, SequenceNode):
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a sequence, but found %s" % node.id, node.start_mark)
for subnode in node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
subnode.start_mark)
if len(subnode.value) != 1:
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
subnode.start_mark)
key_node, value_node = subnode.value[0]
key = self.construct_object(key_node)
value = self.construct_object(value_node)
pairs.append((key, value))
def construct_yaml_set(self, node):
data = set()
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_str(self, node):
return self.construct_scalar(node)
def construct_yaml_seq(self, node):
data = []
yield data
data.extend(self.construct_sequence(node))
def construct_yaml_map(self, node):
data = {}
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_object(self, node, cls):
data = cls.__new__(cls)
yield data
if hasattr(data, '__setstate__'):
state = self.construct_mapping(node, deep=True)
data.__setstate__(state)
else:
state = self.construct_mapping(node)
data.__dict__.update(state)
def construct_undefined(self, node):
raise ConstructorError(None, None,
"could not determine a constructor for the tag %r" % node.tag,
node.start_mark)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:null',
SafeConstructor.construct_yaml_null)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:bool',
SafeConstructor.construct_yaml_bool)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:int',
SafeConstructor.construct_yaml_int)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:float',
SafeConstructor.construct_yaml_float)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:binary',
SafeConstructor.construct_yaml_binary)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:timestamp',
SafeConstructor.construct_yaml_timestamp)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:omap',
SafeConstructor.construct_yaml_omap)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:pairs',
SafeConstructor.construct_yaml_pairs)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:set',
SafeConstructor.construct_yaml_set)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:str',
SafeConstructor.construct_yaml_str)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:seq',
SafeConstructor.construct_yaml_seq)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:map',
SafeConstructor.construct_yaml_map)
SafeConstructor.add_constructor(None,
SafeConstructor.construct_undefined)
class FullConstructor(SafeConstructor):
# 'extend' is blacklisted because it is used by
# construct_python_object_apply to add `listitems` to a newly generate
# python instance
def get_state_keys_blacklist(self):
return ['^extend$', '^__.*__$']
def get_state_keys_blacklist_regexp(self):
if not hasattr(self, 'state_keys_blacklist_regexp'):
self.state_keys_blacklist_regexp = re.compile('(' + '|'.join(self.get_state_keys_blacklist()) + ')')
return self.state_keys_blacklist_regexp
def construct_python_str(self, node):
return self.construct_scalar(node)
def construct_python_unicode(self, node):
return self.construct_scalar(node)
def construct_python_bytes(self, node):
try:
value = self.construct_scalar(node).encode('ascii')
except UnicodeEncodeError as exc:
raise ConstructorError(None, None,
"failed to convert base64 data into ascii: %s" % exc,
node.start_mark)
try:
if hasattr(base64, 'decodebytes'):
return base64.decodebytes(value)
else:
return base64.decodestring(value)
except binascii.Error as exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
def construct_python_long(self, node):
return self.construct_yaml_int(node)
def construct_python_complex(self, node):
return complex(self.construct_scalar(node))
def construct_python_tuple(self, node):
return tuple(self.construct_sequence(node))
def find_python_module(self, name, mark, unsafe=False):
if not name:
raise ConstructorError("while constructing a Python module", mark,
"expected non-empty name appended to the tag", mark)
if unsafe:
try:
__import__(name)
except ImportError as exc:
raise ConstructorError("while constructing a Python module", mark,
"cannot find module %r (%s)" % (name, exc), mark)
if name not in sys.modules:
raise ConstructorError("while constructing a Python module", mark,
"module %r is not imported" % name, mark)
return sys.modules[name]
def find_python_name(self, name, mark, unsafe=False):
if not name:
raise ConstructorError("while constructing a Python object", mark,
"expected non-empty name appended to the tag", mark)
if '.' in name:
module_name, object_name = name.rsplit('.', 1)
else:
module_name = 'builtins'
object_name = name
if unsafe:
try:
__import__(module_name)
except ImportError as exc:
raise ConstructorError("while constructing a Python object", mark,
"cannot find module %r (%s)" % (module_name, exc), mark)
if module_name not in sys.modules:
raise ConstructorError("while constructing a Python object", mark,
"module %r is not imported" % module_name, mark)
module = sys.modules[module_name]
if not hasattr(module, object_name):
raise ConstructorError("while constructing a Python object", mark,
"cannot find %r in the module %r"
% (object_name, module.__name__), mark)
return getattr(module, object_name)
def construct_python_name(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python name", node.start_mark,
"expected the empty value, but found %r" % value, node.start_mark)
return self.find_python_name(suffix, node.start_mark)
def construct_python_module(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python module", node.start_mark,
"expected the empty value, but found %r" % value, node.start_mark)
return self.find_python_module(suffix, node.start_mark)
def make_python_instance(self, suffix, node,
args=None, kwds=None, newobj=False, unsafe=False):
if not args:
args = []
if not kwds:
kwds = {}
cls = self.find_python_name(suffix, node.start_mark)
if not (unsafe or isinstance(cls, type)):
raise ConstructorError("while constructing a Python instance", node.start_mark,
"expected a class, but found %r" % type(cls),
node.start_mark)
if newobj and isinstance(cls, type):
return cls.__new__(cls, *args, **kwds)
else:
return cls(*args, **kwds)
def set_python_instance_state(self, instance, state, unsafe=False):
if hasattr(instance, '__setstate__'):
instance.__setstate__(state)
else:
slotstate = {}
if isinstance(state, tuple) and len(state) == 2:
state, slotstate = state
if hasattr(instance, '__dict__'):
if not unsafe and state:
for key in state.keys():
self.check_state_key(key)
instance.__dict__.update(state)
elif state:
slotstate.update(state)
for key, value in slotstate.items():
if not unsafe:
self.check_state_key(key)
setattr(instance, key, value)
def construct_python_object(self, suffix, node):
# Format:
# !!python/object:module.name { ... state ... }
instance = self.make_python_instance(suffix, node, newobj=True)
yield instance
deep = hasattr(instance, '__setstate__')
state = self.construct_mapping(node, deep=deep)
self.set_python_instance_state(instance, state)
def construct_python_object_apply(self, suffix, node, newobj=False):
# Format:
# !!python/object/apply # (or !!python/object/new)
# args: [ ... arguments ... ]
# kwds: { ... keywords ... }
# state: ... state ...
# listitems: [ ... listitems ... ]
# dictitems: { ... dictitems ... }
# or short format:
# !!python/object/apply [ ... arguments ... ]
# The difference between !!python/object/apply and !!python/object/new
# is how an object is created, check make_python_instance for details.
if isinstance(node, SequenceNode):
args = self.construct_sequence(node, deep=True)
kwds = {}
state = {}
listitems = []
dictitems = {}
else:
value = self.construct_mapping(node, deep=True)
args = value.get('args', [])
kwds = value.get('kwds', {})
state = value.get('state', {})
listitems = value.get('listitems', [])
dictitems = value.get('dictitems', {})
instance = self.make_python_instance(suffix, node, args, kwds, newobj)
if state:
self.set_python_instance_state(instance, state)
if listitems:
instance.extend(listitems)
if dictitems:
for key in dictitems:
instance[key] = dictitems[key]
return instance
def construct_python_object_new(self, suffix, node):
return self.construct_python_object_apply(suffix, node, newobj=True)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/none',
FullConstructor.construct_yaml_null)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/bool',
FullConstructor.construct_yaml_bool)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/str',
FullConstructor.construct_python_str)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/unicode',
FullConstructor.construct_python_unicode)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/bytes',
FullConstructor.construct_python_bytes)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/int',
FullConstructor.construct_yaml_int)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/long',
FullConstructor.construct_python_long)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/float',
FullConstructor.construct_yaml_float)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/complex',
FullConstructor.construct_python_complex)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/list',
FullConstructor.construct_yaml_seq)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/tuple',
FullConstructor.construct_python_tuple)
FullConstructor.add_constructor(
'tag:yaml.org,2002:python/dict',
FullConstructor.construct_yaml_map)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/name:',
FullConstructor.construct_python_name)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/module:',
FullConstructor.construct_python_module)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/object:',
FullConstructor.construct_python_object)
FullConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/object/new:',
FullConstructor.construct_python_object_new)
class UnsafeConstructor(FullConstructor):
def find_python_module(self, name, mark):
return super(UnsafeConstructor, self).find_python_module(name, mark, unsafe=True)
def find_python_name(self, name, mark):
return super(UnsafeConstructor, self).find_python_name(name, mark, unsafe=True)
def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False):
return super(UnsafeConstructor, self).make_python_instance(
suffix, node, args, kwds, newobj, unsafe=True)
def set_python_instance_state(self, instance, state):
return super(UnsafeConstructor, self).set_python_instance_state(
instance, state, unsafe=True)
UnsafeConstructor.add_multi_constructor(
'tag:yaml.org,2002:python/object/apply:',
UnsafeConstructor.construct_python_object_apply)
# Constructor is same as UnsafeConstructor. Need to leave this in place in case
# people have extended it directly.
class Constructor(UnsafeConstructor):
pass
================================================
FILE: metaflow/_vendor/yaml/cyaml.py
================================================
__all__ = [
'CBaseLoader', 'CSafeLoader', 'CFullLoader', 'CUnsafeLoader', 'CLoader',
'CBaseDumper', 'CSafeDumper', 'CDumper'
]
from _yaml import CParser, CEmitter
from .constructor import *
from .serializer import *
from .representer import *
from .resolver import *
class CBaseLoader(CParser, BaseConstructor, BaseResolver):
def __init__(self, stream):
CParser.__init__(self, stream)
BaseConstructor.__init__(self)
BaseResolver.__init__(self)
class CSafeLoader(CParser, SafeConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
SafeConstructor.__init__(self)
Resolver.__init__(self)
class CFullLoader(CParser, FullConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
FullConstructor.__init__(self)
Resolver.__init__(self)
class CUnsafeLoader(CParser, UnsafeConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
UnsafeConstructor.__init__(self)
Resolver.__init__(self)
class CLoader(CParser, Constructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
Constructor.__init__(self)
Resolver.__init__(self)
class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class CSafeDumper(CEmitter, SafeRepresenter, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class CDumper(CEmitter, Serializer, Representer, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
================================================
FILE: metaflow/_vendor/yaml/dumper.py
================================================
__all__ = ['BaseDumper', 'SafeDumper', 'Dumper']
from .emitter import *
from .serializer import *
from .representer import *
from .resolver import *
class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
class Dumper(Emitter, Serializer, Representer, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=False,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None, sort_keys=True):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style, sort_keys=sort_keys)
Resolver.__init__(self)
================================================
FILE: metaflow/_vendor/yaml/emitter.py
================================================
# Emitter expects events obeying the following grammar:
# stream ::= STREAM-START document* STREAM-END
# document ::= DOCUMENT-START node DOCUMENT-END
# node ::= SCALAR | sequence | mapping
# sequence ::= SEQUENCE-START node* SEQUENCE-END
# mapping ::= MAPPING-START (node node)* MAPPING-END
__all__ = ['Emitter', 'EmitterError']
from .error import YAMLError
from .events import *
class EmitterError(YAMLError):
pass
class ScalarAnalysis:
def __init__(self, scalar, empty, multiline,
allow_flow_plain, allow_block_plain,
allow_single_quoted, allow_double_quoted,
allow_block):
self.scalar = scalar
self.empty = empty
self.multiline = multiline
self.allow_flow_plain = allow_flow_plain
self.allow_block_plain = allow_block_plain
self.allow_single_quoted = allow_single_quoted
self.allow_double_quoted = allow_double_quoted
self.allow_block = allow_block
class Emitter:
DEFAULT_TAG_PREFIXES = {
'!' : '!',
'tag:yaml.org,2002:' : '!!',
}
def __init__(self, stream, canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None):
# The stream should have the methods `write` and possibly `flush`.
self.stream = stream
# Encoding can be overridden by STREAM-START.
self.encoding = None
# Emitter is a state machine with a stack of states to handle nested
# structures.
self.states = []
self.state = self.expect_stream_start
# Current event and the event queue.
self.events = []
self.event = None
# The current indentation level and the stack of previous indents.
self.indents = []
self.indent = None
# Flow level.
self.flow_level = 0
# Contexts.
self.root_context = False
self.sequence_context = False
self.mapping_context = False
self.simple_key_context = False
# Characteristics of the last emitted character:
# - current position.
# - is it a whitespace?
# - is it an indention character
# (indentation space, '-', '?', or ':')?
self.line = 0
self.column = 0
self.whitespace = True
self.indention = True
# Whether the document requires an explicit document indicator
self.open_ended = False
# Formatting details.
self.canonical = canonical
self.allow_unicode = allow_unicode
self.best_indent = 2
if indent and 1 < indent < 10:
self.best_indent = indent
self.best_width = 80
if width and width > self.best_indent*2:
self.best_width = width
self.best_line_break = '\n'
if line_break in ['\r', '\n', '\r\n']:
self.best_line_break = line_break
# Tag prefixes.
self.tag_prefixes = None
# Prepared anchor and tag.
self.prepared_anchor = None
self.prepared_tag = None
# Scalar analysis and style.
self.analysis = None
self.style = None
def dispose(self):
# Reset the state attributes (to clear self-references)
self.states = []
self.state = None
def emit(self, event):
self.events.append(event)
while not self.need_more_events():
self.event = self.events.pop(0)
self.state()
self.event = None
# In some cases, we wait for a few next events before emitting.
def need_more_events(self):
if not self.events:
return True
event = self.events[0]
if isinstance(event, DocumentStartEvent):
return self.need_events(1)
elif isinstance(event, SequenceStartEvent):
return self.need_events(2)
elif isinstance(event, MappingStartEvent):
return self.need_events(3)
else:
return False
def need_events(self, count):
level = 0
for event in self.events[1:]:
if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
level += 1
elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
level -= 1
elif isinstance(event, StreamEndEvent):
level = -1
if level < 0:
return False
return (len(self.events) < count+1)
def increase_indent(self, flow=False, indentless=False):
self.indents.append(self.indent)
if self.indent is None:
if flow:
self.indent = self.best_indent
else:
self.indent = 0
elif not indentless:
self.indent += self.best_indent
# States.
# Stream handlers.
def expect_stream_start(self):
if isinstance(self.event, StreamStartEvent):
if self.event.encoding and not hasattr(self.stream, 'encoding'):
self.encoding = self.event.encoding
self.write_stream_start()
self.state = self.expect_first_document_start
else:
raise EmitterError("expected StreamStartEvent, but got %s"
% self.event)
def expect_nothing(self):
raise EmitterError("expected nothing, but got %s" % self.event)
# Document handlers.
def expect_first_document_start(self):
return self.expect_document_start(first=True)
def expect_document_start(self, first=False):
if isinstance(self.event, DocumentStartEvent):
if (self.event.version or self.event.tags) and self.open_ended:
self.write_indicator('...', True)
self.write_indent()
if self.event.version:
version_text = self.prepare_version(self.event.version)
self.write_version_directive(version_text)
self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
if self.event.tags:
handles = sorted(self.event.tags.keys())
for handle in handles:
prefix = self.event.tags[handle]
self.tag_prefixes[prefix] = handle
handle_text = self.prepare_tag_handle(handle)
prefix_text = self.prepare_tag_prefix(prefix)
self.write_tag_directive(handle_text, prefix_text)
implicit = (first and not self.event.explicit and not self.canonical
and not self.event.version and not self.event.tags
and not self.check_empty_document())
if not implicit:
self.write_indent()
self.write_indicator('---', True)
if self.canonical:
self.write_indent()
self.state = self.expect_document_root
elif isinstance(self.event, StreamEndEvent):
if self.open_ended:
self.write_indicator('...', True)
self.write_indent()
self.write_stream_end()
self.state = self.expect_nothing
else:
raise EmitterError("expected DocumentStartEvent, but got %s"
% self.event)
def expect_document_end(self):
if isinstance(self.event, DocumentEndEvent):
self.write_indent()
if self.event.explicit:
self.write_indicator('...', True)
self.write_indent()
self.flush_stream()
self.state = self.expect_document_start
else:
raise EmitterError("expected DocumentEndEvent, but got %s"
% self.event)
def expect_document_root(self):
self.states.append(self.expect_document_end)
self.expect_node(root=True)
# Node handlers.
def expect_node(self, root=False, sequence=False, mapping=False,
simple_key=False):
self.root_context = root
self.sequence_context = sequence
self.mapping_context = mapping
self.simple_key_context = simple_key
if isinstance(self.event, AliasEvent):
self.expect_alias()
elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
self.process_anchor('&')
self.process_tag()
if isinstance(self.event, ScalarEvent):
self.expect_scalar()
elif isinstance(self.event, SequenceStartEvent):
if self.flow_level or self.canonical or self.event.flow_style \
or self.check_empty_sequence():
self.expect_flow_sequence()
else:
self.expect_block_sequence()
elif isinstance(self.event, MappingStartEvent):
if self.flow_level or self.canonical or self.event.flow_style \
or self.check_empty_mapping():
self.expect_flow_mapping()
else:
self.expect_block_mapping()
else:
raise EmitterError("expected NodeEvent, but got %s" % self.event)
def expect_alias(self):
if self.event.anchor is None:
raise EmitterError("anchor is not specified for alias")
self.process_anchor('*')
self.state = self.states.pop()
def expect_scalar(self):
self.increase_indent(flow=True)
self.process_scalar()
self.indent = self.indents.pop()
self.state = self.states.pop()
# Flow sequence handlers.
def expect_flow_sequence(self):
self.write_indicator('[', True, whitespace=True)
self.flow_level += 1
self.increase_indent(flow=True)
self.state = self.expect_first_flow_sequence_item
def expect_first_flow_sequence_item(self):
if isinstance(self.event, SequenceEndEvent):
self.indent = self.indents.pop()
self.flow_level -= 1
self.write_indicator(']', False)
self.state = self.states.pop()
else:
if self.canonical or self.column > self.best_width:
self.write_indent()
self.states.append(self.expect_flow_sequence_item)
self.expect_node(sequence=True)
def expect_flow_sequence_item(self):
if isinstance(self.event, SequenceEndEvent):
self.indent = self.indents.pop()
self.flow_level -= 1
if self.canonical:
self.write_indicator(',', False)
self.write_indent()
self.write_indicator(']', False)
self.state = self.states.pop()
else:
self.write_indicator(',', False)
if self.canonical or self.column > self.best_width:
self.write_indent()
self.states.append(self.expect_flow_sequence_item)
self.expect_node(sequence=True)
# Flow mapping handlers.
def expect_flow_mapping(self):
self.write_indicator('{', True, whitespace=True)
self.flow_level += 1
self.increase_indent(flow=True)
self.state = self.expect_first_flow_mapping_key
def expect_first_flow_mapping_key(self):
if isinstance(self.event, MappingEndEvent):
self.indent = self.indents.pop()
self.flow_level -= 1
self.write_indicator('}', False)
self.state = self.states.pop()
else:
if self.canonical or self.column > self.best_width:
self.write_indent()
if not self.canonical and self.check_simple_key():
self.states.append(self.expect_flow_mapping_simple_value)
self.expect_node(mapping=True, simple_key=True)
else:
self.write_indicator('?', True)
self.states.append(self.expect_flow_mapping_value)
self.expect_node(mapping=True)
def expect_flow_mapping_key(self):
if isinstance(self.event, MappingEndEvent):
self.indent = self.indents.pop()
self.flow_level -= 1
if self.canonical:
self.write_indicator(',', False)
self.write_indent()
self.write_indicator('}', False)
self.state = self.states.pop()
else:
self.write_indicator(',', False)
if self.canonical or self.column > self.best_width:
self.write_indent()
if not self.canonical and self.check_simple_key():
self.states.append(self.expect_flow_mapping_simple_value)
self.expect_node(mapping=True, simple_key=True)
else:
self.write_indicator('?', True)
self.states.append(self.expect_flow_mapping_value)
self.expect_node(mapping=True)
def expect_flow_mapping_simple_value(self):
self.write_indicator(':', False)
self.states.append(self.expect_flow_mapping_key)
self.expect_node(mapping=True)
def expect_flow_mapping_value(self):
if self.canonical or self.column > self.best_width:
self.write_indent()
self.write_indicator(':', True)
self.states.append(self.expect_flow_mapping_key)
self.expect_node(mapping=True)
# Block sequence handlers.
def expect_block_sequence(self):
indentless = (self.mapping_context and not self.indention)
self.increase_indent(flow=False, indentless=indentless)
self.state = self.expect_first_block_sequence_item
def expect_first_block_sequence_item(self):
return self.expect_block_sequence_item(first=True)
def expect_block_sequence_item(self, first=False):
if not first and isinstance(self.event, SequenceEndEvent):
self.indent = self.indents.pop()
self.state = self.states.pop()
else:
self.write_indent()
self.write_indicator('-', True, indention=True)
self.states.append(self.expect_block_sequence_item)
self.expect_node(sequence=True)
# Block mapping handlers.
def expect_block_mapping(self):
self.increase_indent(flow=False)
self.state = self.expect_first_block_mapping_key
def expect_first_block_mapping_key(self):
return self.expect_block_mapping_key(first=True)
def expect_block_mapping_key(self, first=False):
if not first and isinstance(self.event, MappingEndEvent):
self.indent = self.indents.pop()
self.state = self.states.pop()
else:
self.write_indent()
if self.check_simple_key():
self.states.append(self.expect_block_mapping_simple_value)
self.expect_node(mapping=True, simple_key=True)
else:
self.write_indicator('?', True, indention=True)
self.states.append(self.expect_block_mapping_value)
self.expect_node(mapping=True)
def expect_block_mapping_simple_value(self):
self.write_indicator(':', False)
self.states.append(self.expect_block_mapping_key)
self.expect_node(mapping=True)
def expect_block_mapping_value(self):
self.write_indent()
self.write_indicator(':', True, indention=True)
self.states.append(self.expect_block_mapping_key)
self.expect_node(mapping=True)
# Checkers.
def check_empty_sequence(self):
return (isinstance(self.event, SequenceStartEvent) and self.events
and isinstance(self.events[0], SequenceEndEvent))
def check_empty_mapping(self):
return (isinstance(self.event, MappingStartEvent) and self.events
and isinstance(self.events[0], MappingEndEvent))
def check_empty_document(self):
if not isinstance(self.event, DocumentStartEvent) or not self.events:
return False
event = self.events[0]
return (isinstance(event, ScalarEvent) and event.anchor is None
and event.tag is None and event.implicit and event.value == '')
def check_simple_key(self):
length = 0
if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
if self.prepared_anchor is None:
self.prepared_anchor = self.prepare_anchor(self.event.anchor)
length += len(self.prepared_anchor)
if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
and self.event.tag is not None:
if self.prepared_tag is None:
self.prepared_tag = self.prepare_tag(self.event.tag)
length += len(self.prepared_tag)
if isinstance(self.event, ScalarEvent):
if self.analysis is None:
self.analysis = self.analyze_scalar(self.event.value)
length += len(self.analysis.scalar)
return (length < 128 and (isinstance(self.event, AliasEvent)
or (isinstance(self.event, ScalarEvent)
and not self.analysis.empty and not self.analysis.multiline)
or self.check_empty_sequence() or self.check_empty_mapping()))
# Anchor, Tag, and Scalar processors.
def process_anchor(self, indicator):
if self.event.anchor is None:
self.prepared_anchor = None
return
if self.prepared_anchor is None:
self.prepared_anchor = self.prepare_anchor(self.event.anchor)
if self.prepared_anchor:
self.write_indicator(indicator+self.prepared_anchor, True)
self.prepared_anchor = None
def process_tag(self):
tag = self.event.tag
if isinstance(self.event, ScalarEvent):
if self.style is None:
self.style = self.choose_scalar_style()
if ((not self.canonical or tag is None) and
((self.style == '' and self.event.implicit[0])
or (self.style != '' and self.event.implicit[1]))):
self.prepared_tag = None
return
if self.event.implicit[0] and tag is None:
tag = '!'
self.prepared_tag = None
else:
if (not self.canonical or tag is None) and self.event.implicit:
self.prepared_tag = None
return
if tag is None:
raise EmitterError("tag is not specified")
if self.prepared_tag is None:
self.prepared_tag = self.prepare_tag(tag)
if self.prepared_tag:
self.write_indicator(self.prepared_tag, True)
self.prepared_tag = None
def choose_scalar_style(self):
if self.analysis is None:
self.analysis = self.analyze_scalar(self.event.value)
if self.event.style == '"' or self.canonical:
return '"'
if not self.event.style and self.event.implicit[0]:
if (not (self.simple_key_context and
(self.analysis.empty or self.analysis.multiline))
and (self.flow_level and self.analysis.allow_flow_plain
or (not self.flow_level and self.analysis.allow_block_plain))):
return ''
if self.event.style and self.event.style in '|>':
if (not self.flow_level and not self.simple_key_context
and self.analysis.allow_block):
return self.event.style
if not self.event.style or self.event.style == '\'':
if (self.analysis.allow_single_quoted and
not (self.simple_key_context and self.analysis.multiline)):
return '\''
return '"'
def process_scalar(self):
if self.analysis is None:
self.analysis = self.analyze_scalar(self.event.value)
if self.style is None:
self.style = self.choose_scalar_style()
split = (not self.simple_key_context)
#if self.analysis.multiline and split \
# and (not self.style or self.style in '\'\"'):
# self.write_indent()
if self.style == '"':
self.write_double_quoted(self.analysis.scalar, split)
elif self.style == '\'':
self.write_single_quoted(self.analysis.scalar, split)
elif self.style == '>':
self.write_folded(self.analysis.scalar)
elif self.style == '|':
self.write_literal(self.analysis.scalar)
else:
self.write_plain(self.analysis.scalar, split)
self.analysis = None
self.style = None
# Analyzers.
def prepare_version(self, version):
major, minor = version
if major != 1:
raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
return '%d.%d' % (major, minor)
def prepare_tag_handle(self, handle):
if not handle:
raise EmitterError("tag handle must not be empty")
if handle[0] != '!' or handle[-1] != '!':
raise EmitterError("tag handle must start and end with '!': %r" % handle)
for ch in handle[1:-1]:
if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_'):
raise EmitterError("invalid character %r in the tag handle: %r"
% (ch, handle))
return handle
def prepare_tag_prefix(self, prefix):
if not prefix:
raise EmitterError("tag prefix must not be empty")
chunks = []
start = end = 0
if prefix[0] == '!':
end = 1
while end < len(prefix):
ch = prefix[end]
if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-;/?!:@&=+$,_.~*\'()[]':
end += 1
else:
if start < end:
chunks.append(prefix[start:end])
start = end = end+1
data = ch.encode('utf-8')
for ch in data:
chunks.append('%%%02X' % ord(ch))
if start < end:
chunks.append(prefix[start:end])
return ''.join(chunks)
def prepare_tag(self, tag):
if not tag:
raise EmitterError("tag must not be empty")
if tag == '!':
return tag
handle = None
suffix = tag
prefixes = sorted(self.tag_prefixes.keys())
for prefix in prefixes:
if tag.startswith(prefix) \
and (prefix == '!' or len(prefix) < len(tag)):
handle = self.tag_prefixes[prefix]
suffix = tag[len(prefix):]
chunks = []
start = end = 0
while end < len(suffix):
ch = suffix[end]
if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-;/?:@&=+$,_.~*\'()[]' \
or (ch == '!' and handle != '!'):
end += 1
else:
if start < end:
chunks.append(suffix[start:end])
start = end = end+1
data = ch.encode('utf-8')
for ch in data:
chunks.append('%%%02X' % ch)
if start < end:
chunks.append(suffix[start:end])
suffix_text = ''.join(chunks)
if handle:
return '%s%s' % (handle, suffix_text)
else:
return '!<%s>' % suffix_text
def prepare_anchor(self, anchor):
if not anchor:
raise EmitterError("anchor must not be empty")
for ch in anchor:
if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_'):
raise EmitterError("invalid character %r in the anchor: %r"
% (ch, anchor))
return anchor
def analyze_scalar(self, scalar):
# Empty scalar is a special case.
if not scalar:
return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
allow_flow_plain=False, allow_block_plain=True,
allow_single_quoted=True, allow_double_quoted=True,
allow_block=False)
# Indicators and special characters.
block_indicators = False
flow_indicators = False
line_breaks = False
special_characters = False
# Important whitespace combinations.
leading_space = False
leading_break = False
trailing_space = False
trailing_break = False
break_space = False
space_break = False
# Check document indicators.
if scalar.startswith('---') or scalar.startswith('...'):
block_indicators = True
flow_indicators = True
# First character or preceded by a whitespace.
preceded_by_whitespace = True
# Last character or followed by a whitespace.
followed_by_whitespace = (len(scalar) == 1 or
scalar[1] in '\0 \t\r\n\x85\u2028\u2029')
# The previous character is a space.
previous_space = False
# The previous character is a break.
previous_break = False
index = 0
while index < len(scalar):
ch = scalar[index]
# Check for indicators.
if index == 0:
# Leading indicators are special characters.
if ch in '#,[]{}&*!|>\'\"%@`':
flow_indicators = True
block_indicators = True
if ch in '?:':
flow_indicators = True
if followed_by_whitespace:
block_indicators = True
if ch == '-' and followed_by_whitespace:
flow_indicators = True
block_indicators = True
else:
# Some indicators cannot appear within a scalar as well.
if ch in ',?[]{}':
flow_indicators = True
if ch == ':':
flow_indicators = True
if followed_by_whitespace:
block_indicators = True
if ch == '#' and preceded_by_whitespace:
flow_indicators = True
block_indicators = True
# Check for line breaks, special, and unicode characters.
if ch in '\n\x85\u2028\u2029':
line_breaks = True
if not (ch == '\n' or '\x20' <= ch <= '\x7E'):
if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF'
or '\uE000' <= ch <= '\uFFFD'
or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF':
unicode_characters = True
if not self.allow_unicode:
special_characters = True
else:
special_characters = True
# Detect important whitespace combinations.
if ch == ' ':
if index == 0:
leading_space = True
if index == len(scalar)-1:
trailing_space = True
if previous_break:
break_space = True
previous_space = True
previous_break = False
elif ch in '\n\x85\u2028\u2029':
if index == 0:
leading_break = True
if index == len(scalar)-1:
trailing_break = True
if previous_space:
space_break = True
previous_space = False
previous_break = True
else:
previous_space = False
previous_break = False
# Prepare for the next character.
index += 1
preceded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029')
followed_by_whitespace = (index+1 >= len(scalar) or
scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029')
# Let's decide what styles are allowed.
allow_flow_plain = True
allow_block_plain = True
allow_single_quoted = True
allow_double_quoted = True
allow_block = True
# Leading and trailing whitespaces are bad for plain scalars.
if (leading_space or leading_break
or trailing_space or trailing_break):
allow_flow_plain = allow_block_plain = False
# We do not permit trailing spaces for block scalars.
if trailing_space:
allow_block = False
# Spaces at the beginning of a new line are only acceptable for block
# scalars.
if break_space:
allow_flow_plain = allow_block_plain = allow_single_quoted = False
# Spaces followed by breaks, as well as special character are only
# allowed for double quoted scalars.
if space_break or special_characters:
allow_flow_plain = allow_block_plain = \
allow_single_quoted = allow_block = False
# Although the plain scalar writer supports breaks, we never emit
# multiline plain scalars.
if line_breaks:
allow_flow_plain = allow_block_plain = False
# Flow indicators are forbidden for flow plain scalars.
if flow_indicators:
allow_flow_plain = False
# Block indicators are forbidden for block plain scalars.
if block_indicators:
allow_block_plain = False
return ScalarAnalysis(scalar=scalar,
empty=False, multiline=line_breaks,
allow_flow_plain=allow_flow_plain,
allow_block_plain=allow_block_plain,
allow_single_quoted=allow_single_quoted,
allow_double_quoted=allow_double_quoted,
allow_block=allow_block)
# Writers.
def flush_stream(self):
if hasattr(self.stream, 'flush'):
self.stream.flush()
def write_stream_start(self):
# Write BOM if needed.
if self.encoding and self.encoding.startswith('utf-16'):
self.stream.write('\uFEFF'.encode(self.encoding))
def write_stream_end(self):
self.flush_stream()
def write_indicator(self, indicator, need_whitespace,
whitespace=False, indention=False):
if self.whitespace or not need_whitespace:
data = indicator
else:
data = ' '+indicator
self.whitespace = whitespace
self.indention = self.indention and indention
self.column += len(data)
self.open_ended = False
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
def write_indent(self):
indent = self.indent or 0
if not self.indention or self.column > indent \
or (self.column == indent and not self.whitespace):
self.write_line_break()
if self.column < indent:
self.whitespace = True
data = ' '*(indent-self.column)
self.column = indent
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
def write_line_break(self, data=None):
if data is None:
data = self.best_line_break
self.whitespace = True
self.indention = True
self.line += 1
self.column = 0
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
def write_version_directive(self, version_text):
data = '%%YAML %s' % version_text
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
self.write_line_break()
def write_tag_directive(self, handle_text, prefix_text):
data = '%%TAG %s %s' % (handle_text, prefix_text)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
self.write_line_break()
# Scalar streams.
def write_single_quoted(self, text, split=True):
self.write_indicator('\'', True)
spaces = False
breaks = False
start = end = 0
while end <= len(text):
ch = None
if end < len(text):
ch = text[end]
if spaces:
if ch is None or ch != ' ':
if start+1 == end and self.column > self.best_width and split \
and start != 0 and end != len(text):
self.write_indent()
else:
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end
elif breaks:
if ch is None or ch not in '\n\x85\u2028\u2029':
if text[start] == '\n':
self.write_line_break()
for br in text[start:end]:
if br == '\n':
self.write_line_break()
else:
self.write_line_break(br)
self.write_indent()
start = end
else:
if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'':
if start < end:
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end
if ch == '\'':
data = '\'\''
self.column += 2
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end + 1
if ch is not None:
spaces = (ch == ' ')
breaks = (ch in '\n\x85\u2028\u2029')
end += 1
self.write_indicator('\'', False)
ESCAPE_REPLACEMENTS = {
'\0': '0',
'\x07': 'a',
'\x08': 'b',
'\x09': 't',
'\x0A': 'n',
'\x0B': 'v',
'\x0C': 'f',
'\x0D': 'r',
'\x1B': 'e',
'\"': '\"',
'\\': '\\',
'\x85': 'N',
'\xA0': '_',
'\u2028': 'L',
'\u2029': 'P',
}
def write_double_quoted(self, text, split=True):
self.write_indicator('"', True)
start = end = 0
while end <= len(text):
ch = None
if end < len(text):
ch = text[end]
if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \
or not ('\x20' <= ch <= '\x7E'
or (self.allow_unicode
and ('\xA0' <= ch <= '\uD7FF'
or '\uE000' <= ch <= '\uFFFD'))):
if start < end:
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end
if ch is not None:
if ch in self.ESCAPE_REPLACEMENTS:
data = '\\'+self.ESCAPE_REPLACEMENTS[ch]
elif ch <= '\xFF':
data = '\\x%02X' % ord(ch)
elif ch <= '\uFFFF':
data = '\\u%04X' % ord(ch)
else:
data = '\\U%08X' % ord(ch)
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end+1
if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \
and self.column+(end-start) > self.best_width and split:
data = text[start:end]+'\\'
if start < end:
start = end
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
self.write_indent()
self.whitespace = False
self.indention = False
if text[start] == ' ':
data = '\\'
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
end += 1
self.write_indicator('"', False)
def determine_block_hints(self, text):
hints = ''
if text:
if text[0] in ' \n\x85\u2028\u2029':
hints += str(self.best_indent)
if text[-1] not in '\n\x85\u2028\u2029':
hints += '-'
elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029':
hints += '+'
return hints
def write_folded(self, text):
hints = self.determine_block_hints(text)
self.write_indicator('>'+hints, True)
if hints[-1:] == '+':
self.open_ended = True
self.write_line_break()
leading_space = True
spaces = False
breaks = True
start = end = 0
while end <= len(text):
ch = None
if end < len(text):
ch = text[end]
if breaks:
if ch is None or ch not in '\n\x85\u2028\u2029':
if not leading_space and ch is not None and ch != ' ' \
and text[start] == '\n':
self.write_line_break()
leading_space = (ch == ' ')
for br in text[start:end]:
if br == '\n':
self.write_line_break()
else:
self.write_line_break(br)
if ch is not None:
self.write_indent()
start = end
elif spaces:
if ch != ' ':
if start+1 == end and self.column > self.best_width:
self.write_indent()
else:
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end
else:
if ch is None or ch in ' \n\x85\u2028\u2029':
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
if ch is None:
self.write_line_break()
start = end
if ch is not None:
breaks = (ch in '\n\x85\u2028\u2029')
spaces = (ch == ' ')
end += 1
def write_literal(self, text):
hints = self.determine_block_hints(text)
self.write_indicator('|'+hints, True)
if hints[-1:] == '+':
self.open_ended = True
self.write_line_break()
breaks = True
start = end = 0
while end <= len(text):
ch = None
if end < len(text):
ch = text[end]
if breaks:
if ch is None or ch not in '\n\x85\u2028\u2029':
for br in text[start:end]:
if br == '\n':
self.write_line_break()
else:
self.write_line_break(br)
if ch is not None:
self.write_indent()
start = end
else:
if ch is None or ch in '\n\x85\u2028\u2029':
data = text[start:end]
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
if ch is None:
self.write_line_break()
start = end
if ch is not None:
breaks = (ch in '\n\x85\u2028\u2029')
end += 1
def write_plain(self, text, split=True):
if self.root_context:
self.open_ended = True
if not text:
return
if not self.whitespace:
data = ' '
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
self.whitespace = False
self.indention = False
spaces = False
breaks = False
start = end = 0
while end <= len(text):
ch = None
if end < len(text):
ch = text[end]
if spaces:
if ch != ' ':
if start+1 == end and self.column > self.best_width and split:
self.write_indent()
self.whitespace = False
self.indention = False
else:
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end
elif breaks:
if ch not in '\n\x85\u2028\u2029':
if text[start] == '\n':
self.write_line_break()
for br in text[start:end]:
if br == '\n':
self.write_line_break()
else:
self.write_line_break(br)
self.write_indent()
self.whitespace = False
self.indention = False
start = end
else:
if ch is None or ch in ' \n\x85\u2028\u2029':
data = text[start:end]
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end
if ch is not None:
spaces = (ch == ' ')
breaks = (ch in '\n\x85\u2028\u2029')
end += 1
================================================
FILE: metaflow/_vendor/yaml/error.py
================================================
__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError']
class Mark:
def __init__(self, name, index, line, column, buffer, pointer):
self.name = name
self.index = index
self.line = line
self.column = column
self.buffer = buffer
self.pointer = pointer
def get_snippet(self, indent=4, max_length=75):
if self.buffer is None:
return None
head = ''
start = self.pointer
while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029':
start -= 1
if self.pointer-start > max_length/2-1:
head = ' ... '
start += 5
break
tail = ''
end = self.pointer
while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029':
end += 1
if end-self.pointer > max_length/2-1:
tail = ' ... '
end -= 5
break
snippet = self.buffer[start:end]
return ' '*indent + head + snippet + tail + '\n' \
+ ' '*(indent+self.pointer-start+len(head)) + '^'
def __str__(self):
snippet = self.get_snippet()
where = " in \"%s\", line %d, column %d" \
% (self.name, self.line+1, self.column+1)
if snippet is not None:
where += ":\n"+snippet
return where
class YAMLError(Exception):
pass
class MarkedYAMLError(YAMLError):
def __init__(self, context=None, context_mark=None,
problem=None, problem_mark=None, note=None):
self.context = context
self.context_mark = context_mark
self.problem = problem
self.problem_mark = problem_mark
self.note = note
def __str__(self):
lines = []
if self.context is not None:
lines.append(self.context)
if self.context_mark is not None \
and (self.problem is None or self.problem_mark is None
or self.context_mark.name != self.problem_mark.name
or self.context_mark.line != self.problem_mark.line
or self.context_mark.column != self.problem_mark.column):
lines.append(str(self.context_mark))
if self.problem is not None:
lines.append(self.problem)
if self.problem_mark is not None:
lines.append(str(self.problem_mark))
if self.note is not None:
lines.append(self.note)
return '\n'.join(lines)
================================================
FILE: metaflow/_vendor/yaml/events.py
================================================
# Abstract classes.
class Event(object):
def __init__(self, start_mark=None, end_mark=None):
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in ['anchor', 'tag', 'implicit', 'value']
if hasattr(self, key)]
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
class NodeEvent(Event):
def __init__(self, anchor, start_mark=None, end_mark=None):
self.anchor = anchor
self.start_mark = start_mark
self.end_mark = end_mark
class CollectionStartEvent(NodeEvent):
def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None,
flow_style=None):
self.anchor = anchor
self.tag = tag
self.implicit = implicit
self.start_mark = start_mark
self.end_mark = end_mark
self.flow_style = flow_style
class CollectionEndEvent(Event):
pass
# Implementations.
class StreamStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None, encoding=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
class StreamEndEvent(Event):
pass
class DocumentStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
explicit=None, version=None, tags=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.explicit = explicit
self.version = version
self.tags = tags
class DocumentEndEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
explicit=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.explicit = explicit
class AliasEvent(NodeEvent):
pass
class ScalarEvent(NodeEvent):
def __init__(self, anchor, tag, implicit, value,
start_mark=None, end_mark=None, style=None):
self.anchor = anchor
self.tag = tag
self.implicit = implicit
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
class SequenceStartEvent(CollectionStartEvent):
pass
class SequenceEndEvent(CollectionEndEvent):
pass
class MappingStartEvent(CollectionStartEvent):
pass
class MappingEndEvent(CollectionEndEvent):
pass
================================================
FILE: metaflow/_vendor/yaml/loader.py
================================================
__all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader']
from .reader import *
from .scanner import *
from .parser import *
from .composer import *
from .constructor import *
from .resolver import *
class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
BaseConstructor.__init__(self)
BaseResolver.__init__(self)
class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
FullConstructor.__init__(self)
Resolver.__init__(self)
class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
SafeConstructor.__init__(self)
Resolver.__init__(self)
class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)
# UnsafeLoader is the same as Loader (which is and was always unsafe on
# untrusted input). Use of either Loader or UnsafeLoader should be rare, since
# FullLoad should be able to load almost all YAML safely. Loader is left intact
# to ensure backwards compatibility.
class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)
================================================
FILE: metaflow/_vendor/yaml/nodes.py
================================================
class Node(object):
def __init__(self, tag, value, start_mark, end_mark):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
value = self.value
#if isinstance(value, list):
# if len(value) == 0:
# value = ''
# elif len(value) == 1:
# value = '<1 item>'
# else:
# value = '<%d items>' % len(value)
#else:
# if len(value) > 75:
# value = repr(value[:70]+u' ... ')
# else:
# value = repr(value)
value = repr(value)
return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value)
class ScalarNode(Node):
id = 'scalar'
def __init__(self, tag, value,
start_mark=None, end_mark=None, style=None):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
class CollectionNode(Node):
def __init__(self, tag, value,
start_mark=None, end_mark=None, flow_style=None):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.flow_style = flow_style
class SequenceNode(CollectionNode):
id = 'sequence'
class MappingNode(CollectionNode):
id = 'mapping'
================================================
FILE: metaflow/_vendor/yaml/parser.py
================================================
# The following YAML grammar is LL(1) and is parsed by a recursive descent
# parser.
#
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
# block_node_or_indentless_sequence ::=
# ALIAS
# | properties (block_content | indentless_block_sequence)?
# | block_content
# | indentless_block_sequence
# block_node ::= ALIAS
# | properties block_content?
# | block_content
# flow_node ::= ALIAS
# | properties flow_content?
# | flow_content
# properties ::= TAG ANCHOR? | ANCHOR TAG?
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
# flow_collection ::= flow_sequence | flow_mapping
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
# block_mapping ::= BLOCK-MAPPING_START
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
# flow_sequence ::= FLOW-SEQUENCE-START
# (flow_sequence_entry FLOW-ENTRY)*
# flow_sequence_entry?
# FLOW-SEQUENCE-END
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
# flow_mapping ::= FLOW-MAPPING-START
# (flow_mapping_entry FLOW-ENTRY)*
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# FIRST sets:
#
# stream: { STREAM-START }
# explicit_document: { DIRECTIVE DOCUMENT-START }
# implicit_document: FIRST(block_node)
# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_sequence: { BLOCK-SEQUENCE-START }
# block_mapping: { BLOCK-MAPPING-START }
# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
# indentless_sequence: { ENTRY }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_sequence: { FLOW-SEQUENCE-START }
# flow_mapping: { FLOW-MAPPING-START }
# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
__all__ = ['Parser', 'ParserError']
from .error import MarkedYAMLError
from .tokens import *
from .events import *
from .scanner import *
class ParserError(MarkedYAMLError):
pass
class Parser:
# Since writing a recursive-descendant parser is a straightforward task, we
# do not give many comments here.
DEFAULT_TAGS = {
'!': '!',
'!!': 'tag:yaml.org,2002:',
}
def __init__(self):
self.current_event = None
self.yaml_version = None
self.tag_handles = {}
self.states = []
self.marks = []
self.state = self.parse_stream_start
def dispose(self):
# Reset the state attributes (to clear self-references)
self.states = []
self.state = None
def check_event(self, *choices):
# Check the type of the next event.
if self.current_event is None:
if self.state:
self.current_event = self.state()
if self.current_event is not None:
if not choices:
return True
for choice in choices:
if isinstance(self.current_event, choice):
return True
return False
def peek_event(self):
# Get the next event.
if self.current_event is None:
if self.state:
self.current_event = self.state()
return self.current_event
def get_event(self):
# Get the next event and proceed further.
if self.current_event is None:
if self.state:
self.current_event = self.state()
value = self.current_event
self.current_event = None
return value
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
def parse_stream_start(self):
# Parse the stream start.
token = self.get_token()
event = StreamStartEvent(token.start_mark, token.end_mark,
encoding=token.encoding)
# Prepare the next state.
self.state = self.parse_implicit_document_start
return event
def parse_implicit_document_start(self):
# Parse an implicit document.
if not self.check_token(DirectiveToken, DocumentStartToken,
StreamEndToken):
self.tag_handles = self.DEFAULT_TAGS
token = self.peek_token()
start_mark = end_mark = token.start_mark
event = DocumentStartEvent(start_mark, end_mark,
explicit=False)
# Prepare the next state.
self.states.append(self.parse_document_end)
self.state = self.parse_block_node
return event
else:
return self.parse_document_start()
def parse_document_start(self):
# Parse any extra document end indicators.
while self.check_token(DocumentEndToken):
self.get_token()
# Parse an explicit document.
if not self.check_token(StreamEndToken):
token = self.peek_token()
start_mark = token.start_mark
version, tags = self.process_directives()
if not self.check_token(DocumentStartToken):
raise ParserError(None, None,
"expected '', but found %r"
% self.peek_token().id,
self.peek_token().start_mark)
token = self.get_token()
end_mark = token.end_mark
event = DocumentStartEvent(start_mark, end_mark,
explicit=True, version=version, tags=tags)
self.states.append(self.parse_document_end)
self.state = self.parse_document_content
else:
# Parse the end of the stream.
token = self.get_token()
event = StreamEndEvent(token.start_mark, token.end_mark)
assert not self.states
assert not self.marks
self.state = None
return event
def parse_document_end(self):
# Parse the document end.
token = self.peek_token()
start_mark = end_mark = token.start_mark
explicit = False
if self.check_token(DocumentEndToken):
token = self.get_token()
end_mark = token.end_mark
explicit = True
event = DocumentEndEvent(start_mark, end_mark,
explicit=explicit)
# Prepare the next state.
self.state = self.parse_document_start
return event
def parse_document_content(self):
if self.check_token(DirectiveToken,
DocumentStartToken, DocumentEndToken, StreamEndToken):
event = self.process_empty_scalar(self.peek_token().start_mark)
self.state = self.states.pop()
return event
else:
return self.parse_block_node()
def process_directives(self):
self.yaml_version = None
self.tag_handles = {}
while self.check_token(DirectiveToken):
token = self.get_token()
if token.name == 'YAML':
if self.yaml_version is not None:
raise ParserError(None, None,
"found duplicate YAML directive", token.start_mark)
major, minor = token.value
if major != 1:
raise ParserError(None, None,
"found incompatible YAML document (version 1.* is required)",
token.start_mark)
self.yaml_version = token.value
elif token.name == 'TAG':
handle, prefix = token.value
if handle in self.tag_handles:
raise ParserError(None, None,
"duplicate tag handle %r" % handle,
token.start_mark)
self.tag_handles[handle] = prefix
if self.tag_handles:
value = self.yaml_version, self.tag_handles.copy()
else:
value = self.yaml_version, None
for key in self.DEFAULT_TAGS:
if key not in self.tag_handles:
self.tag_handles[key] = self.DEFAULT_TAGS[key]
return value
# block_node_or_indentless_sequence ::= ALIAS
# | properties (block_content | indentless_block_sequence)?
# | block_content
# | indentless_block_sequence
# block_node ::= ALIAS
# | properties block_content?
# | block_content
# flow_node ::= ALIAS
# | properties flow_content?
# | flow_content
# properties ::= TAG ANCHOR? | ANCHOR TAG?
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
# flow_collection ::= flow_sequence | flow_mapping
def parse_block_node(self):
return self.parse_node(block=True)
def parse_flow_node(self):
return self.parse_node()
def parse_block_node_or_indentless_sequence(self):
return self.parse_node(block=True, indentless_sequence=True)
def parse_node(self, block=False, indentless_sequence=False):
if self.check_token(AliasToken):
token = self.get_token()
event = AliasEvent(token.value, token.start_mark, token.end_mark)
self.state = self.states.pop()
else:
anchor = None
tag = None
start_mark = end_mark = tag_mark = None
if self.check_token(AnchorToken):
token = self.get_token()
start_mark = token.start_mark
end_mark = token.end_mark
anchor = token.value
if self.check_token(TagToken):
token = self.get_token()
tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
elif self.check_token(TagToken):
token = self.get_token()
start_mark = tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
if self.check_token(AnchorToken):
token = self.get_token()
end_mark = token.end_mark
anchor = token.value
if tag is not None:
handle, suffix = tag
if handle is not None:
if handle not in self.tag_handles:
raise ParserError("while parsing a node", start_mark,
"found undefined tag handle %r" % handle,
tag_mark)
tag = self.tag_handles[handle]+suffix
else:
tag = suffix
#if tag == '!':
# raise ParserError("while parsing a node", start_mark,
# "found non-specific tag '!'", tag_mark,
# "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
if start_mark is None:
start_mark = end_mark = self.peek_token().start_mark
event = None
implicit = (tag is None or tag == '!')
if indentless_sequence and self.check_token(BlockEntryToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark)
self.state = self.parse_indentless_sequence_entry
else:
if self.check_token(ScalarToken):
token = self.get_token()
end_mark = token.end_mark
if (token.plain and tag is None) or tag == '!':
implicit = (True, False)
elif tag is None:
implicit = (False, True)
else:
implicit = (False, False)
event = ScalarEvent(anchor, tag, implicit, token.value,
start_mark, end_mark, style=token.style)
self.state = self.states.pop()
elif self.check_token(FlowSequenceStartToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_sequence_first_entry
elif self.check_token(FlowMappingStartToken):
end_mark = self.peek_token().end_mark
event = MappingStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_mapping_first_key
elif block and self.check_token(BlockSequenceStartToken):
end_mark = self.peek_token().start_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=False)
self.state = self.parse_block_sequence_first_entry
elif block and self.check_token(BlockMappingStartToken):
end_mark = self.peek_token().start_mark
event = MappingStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=False)
self.state = self.parse_block_mapping_first_key
elif anchor is not None or tag is not None:
# Empty scalars are allowed even if a tag or an anchor is
# specified.
event = ScalarEvent(anchor, tag, (implicit, False), '',
start_mark, end_mark)
self.state = self.states.pop()
else:
if block:
node = 'block'
else:
node = 'flow'
token = self.peek_token()
raise ParserError("while parsing a %s node" % node, start_mark,
"expected the node content, but found %r" % token.id,
token.start_mark)
return event
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
def parse_block_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_block_sequence_entry()
def parse_block_sequence_entry(self):
if self.check_token(BlockEntryToken):
token = self.get_token()
if not self.check_token(BlockEntryToken, BlockEndToken):
self.states.append(self.parse_block_sequence_entry)
return self.parse_block_node()
else:
self.state = self.parse_block_sequence_entry
return self.process_empty_scalar(token.end_mark)
if not self.check_token(BlockEndToken):
token = self.peek_token()
raise ParserError("while parsing a block collection", self.marks[-1],
"expected , but found %r" % token.id, token.start_mark)
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
def parse_indentless_sequence_entry(self):
if self.check_token(BlockEntryToken):
token = self.get_token()
if not self.check_token(BlockEntryToken,
KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_indentless_sequence_entry)
return self.parse_block_node()
else:
self.state = self.parse_indentless_sequence_entry
return self.process_empty_scalar(token.end_mark)
token = self.peek_token()
event = SequenceEndEvent(token.start_mark, token.start_mark)
self.state = self.states.pop()
return event
# block_mapping ::= BLOCK-MAPPING_START
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
def parse_block_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_block_mapping_key()
def parse_block_mapping_key(self):
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_block_mapping_value)
return self.parse_block_node_or_indentless_sequence()
else:
self.state = self.parse_block_mapping_value
return self.process_empty_scalar(token.end_mark)
if not self.check_token(BlockEndToken):
token = self.peek_token()
raise ParserError("while parsing a block mapping", self.marks[-1],
"expected , but found %r" % token.id, token.start_mark)
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_block_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_block_mapping_key)
return self.parse_block_node_or_indentless_sequence()
else:
self.state = self.parse_block_mapping_key
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_block_mapping_key
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
# flow_sequence ::= FLOW-SEQUENCE-START
# (flow_sequence_entry FLOW-ENTRY)*
# flow_sequence_entry?
# FLOW-SEQUENCE-END
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# Note that while production rules for both flow_sequence_entry and
# flow_mapping_entry are equal, their interpretations are different.
# For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
# generate an inline mapping (set syntax).
def parse_flow_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_sequence_entry(first=True)
def parse_flow_sequence_entry(self, first=False):
if not self.check_token(FlowSequenceEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
else:
token = self.peek_token()
raise ParserError("while parsing a flow sequence", self.marks[-1],
"expected ',' or ']', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.peek_token()
event = MappingStartEvent(None, None, True,
token.start_mark, token.end_mark,
flow_style=True)
self.state = self.parse_flow_sequence_entry_mapping_key
return event
elif not self.check_token(FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry)
return self.parse_flow_node()
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_sequence_entry_mapping_key(self):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry_mapping_value)
return self.parse_flow_node()
else:
self.state = self.parse_flow_sequence_entry_mapping_value
return self.process_empty_scalar(token.end_mark)
def parse_flow_sequence_entry_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry_mapping_end)
return self.parse_flow_node()
else:
self.state = self.parse_flow_sequence_entry_mapping_end
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_flow_sequence_entry_mapping_end
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
def parse_flow_sequence_entry_mapping_end(self):
self.state = self.parse_flow_sequence_entry
token = self.peek_token()
return MappingEndEvent(token.start_mark, token.start_mark)
# flow_mapping ::= FLOW-MAPPING-START
# (flow_mapping_entry FLOW-ENTRY)*
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
def parse_flow_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_mapping_key(first=True)
def parse_flow_mapping_key(self, first=False):
if not self.check_token(FlowMappingEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
else:
token = self.peek_token()
raise ParserError("while parsing a flow mapping", self.marks[-1],
"expected ',' or '}', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_value)
return self.parse_flow_node()
else:
self.state = self.parse_flow_mapping_value
return self.process_empty_scalar(token.end_mark)
elif not self.check_token(FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_empty_value)
return self.parse_flow_node()
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_key)
return self.parse_flow_node()
else:
self.state = self.parse_flow_mapping_key
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_flow_mapping_key
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
def parse_flow_mapping_empty_value(self):
self.state = self.parse_flow_mapping_key
return self.process_empty_scalar(self.peek_token().start_mark)
def process_empty_scalar(self, mark):
return ScalarEvent(None, None, (True, False), '', mark, mark)
================================================
FILE: metaflow/_vendor/yaml/reader.py
================================================
# This module contains abstractions for the input stream. You don't have to
# looks further, there are no pretty code.
#
# We define two classes here.
#
# Mark(source, line, column)
# It's just a record and its only use is producing nice error messages.
# Parser does not use it for any other purposes.
#
# Reader(source, data)
# Reader determines the encoding of `data` and converts it to unicode.
# Reader provides the following methods and attributes:
# reader.peek(length=1) - return the next `length` characters
# reader.forward(length=1) - move the current position to `length` characters.
# reader.index - the number of the current character.
# reader.line, stream.column - the line and the column of the current character.
__all__ = ['Reader', 'ReaderError']
from .error import YAMLError, Mark
import codecs, re
class ReaderError(YAMLError):
def __init__(self, name, position, character, encoding, reason):
self.name = name
self.character = character
self.position = position
self.encoding = encoding
self.reason = reason
def __str__(self):
if isinstance(self.character, bytes):
return "'%s' codec can't decode byte #x%02x: %s\n" \
" in \"%s\", position %d" \
% (self.encoding, ord(self.character), self.reason,
self.name, self.position)
else:
return "unacceptable character #x%04x: %s\n" \
" in \"%s\", position %d" \
% (self.character, self.reason,
self.name, self.position)
class Reader(object):
# Reader:
# - determines the data encoding and converts it to a unicode string,
# - checks if characters are in allowed range,
# - adds '\0' to the end.
# Reader accepts
# - a `bytes` object,
# - a `str` object,
# - a file-like object with its `read` method returning `str`,
# - a file-like object with its `read` method returning `unicode`.
# Yeah, it's ugly and slow.
def __init__(self, stream):
self.name = None
self.stream = None
self.stream_pointer = 0
self.eof = True
self.buffer = ''
self.pointer = 0
self.raw_buffer = None
self.raw_decode = None
self.encoding = None
self.index = 0
self.line = 0
self.column = 0
if isinstance(stream, str):
self.name = ""
self.check_printable(stream)
self.buffer = stream+'\0'
elif isinstance(stream, bytes):
self.name = ""
self.raw_buffer = stream
self.determine_encoding()
else:
self.stream = stream
self.name = getattr(stream, 'name', "")
self.eof = False
self.raw_buffer = None
self.determine_encoding()
def peek(self, index=0):
try:
return self.buffer[self.pointer+index]
except IndexError:
self.update(index+1)
return self.buffer[self.pointer+index]
def prefix(self, length=1):
if self.pointer+length >= len(self.buffer):
self.update(length)
return self.buffer[self.pointer:self.pointer+length]
def forward(self, length=1):
if self.pointer+length+1 >= len(self.buffer):
self.update(length+1)
while length:
ch = self.buffer[self.pointer]
self.pointer += 1
self.index += 1
if ch in '\n\x85\u2028\u2029' \
or (ch == '\r' and self.buffer[self.pointer] != '\n'):
self.line += 1
self.column = 0
elif ch != '\uFEFF':
self.column += 1
length -= 1
def get_mark(self):
if self.stream is None:
return Mark(self.name, self.index, self.line, self.column,
self.buffer, self.pointer)
else:
return Mark(self.name, self.index, self.line, self.column,
None, None)
def determine_encoding(self):
while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
self.update_raw()
if isinstance(self.raw_buffer, bytes):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.update(1)
NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
character = match.group()
position = self.index+(len(self.buffer)-self.pointer)+match.start()
raise ReaderError(self.name, position, ord(character),
'unicode', "special characters are not allowed")
def update(self, length):
if self.raw_buffer is None:
return
self.buffer = self.buffer[self.pointer:]
self.pointer = 0
while len(self.buffer) < length:
if not self.eof:
self.update_raw()
if self.raw_decode is not None:
try:
data, converted = self.raw_decode(self.raw_buffer,
'strict', self.eof)
except UnicodeDecodeError as exc:
character = self.raw_buffer[exc.start]
if self.stream is not None:
position = self.stream_pointer-len(self.raw_buffer)+exc.start
else:
position = exc.start
raise ReaderError(self.name, position, character,
exc.encoding, exc.reason)
else:
data = self.raw_buffer
converted = len(data)
self.check_printable(data)
self.buffer += data
self.raw_buffer = self.raw_buffer[converted:]
if self.eof:
self.buffer += '\0'
self.raw_buffer = None
break
def update_raw(self, size=4096):
data = self.stream.read(size)
if self.raw_buffer is None:
self.raw_buffer = data
else:
self.raw_buffer += data
self.stream_pointer += len(data)
if not data:
self.eof = True
================================================
FILE: metaflow/_vendor/yaml/representer.py
================================================
__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer',
'RepresenterError']
from .error import *
from .nodes import *
import datetime, copyreg, types, base64, collections
class RepresenterError(YAMLError):
pass
class BaseRepresenter:
yaml_representers = {}
yaml_multi_representers = {}
def __init__(self, default_style=None, default_flow_style=False, sort_keys=True):
self.default_style = default_style
self.sort_keys = sort_keys
self.default_flow_style = default_flow_style
self.represented_objects = {}
self.object_keeper = []
self.alias_key = None
def represent(self, data):
node = self.represent_data(data)
self.serialize(node)
self.represented_objects = {}
self.object_keeper = []
self.alias_key = None
def represent_data(self, data):
if self.ignore_aliases(data):
self.alias_key = None
else:
self.alias_key = id(data)
if self.alias_key is not None:
if self.alias_key in self.represented_objects:
node = self.represented_objects[self.alias_key]
#if node is None:
# raise RepresenterError("recursive objects are not allowed: %r" % data)
return node
#self.represented_objects[alias_key] = None
self.object_keeper.append(data)
data_types = type(data).__mro__
if data_types[0] in self.yaml_representers:
node = self.yaml_representers[data_types[0]](self, data)
else:
for data_type in data_types:
if data_type in self.yaml_multi_representers:
node = self.yaml_multi_representers[data_type](self, data)
break
else:
if None in self.yaml_multi_representers:
node = self.yaml_multi_representers[None](self, data)
elif None in self.yaml_representers:
node = self.yaml_representers[None](self, data)
else:
node = ScalarNode(None, str(data))
#if alias_key is not None:
# self.represented_objects[alias_key] = node
return node
@classmethod
def add_representer(cls, data_type, representer):
if not 'yaml_representers' in cls.__dict__:
cls.yaml_representers = cls.yaml_representers.copy()
cls.yaml_representers[data_type] = representer
@classmethod
def add_multi_representer(cls, data_type, representer):
if not 'yaml_multi_representers' in cls.__dict__:
cls.yaml_multi_representers = cls.yaml_multi_representers.copy()
cls.yaml_multi_representers[data_type] = representer
def represent_scalar(self, tag, value, style=None):
if style is None:
style = self.default_style
node = ScalarNode(tag, value, style=style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
return node
def represent_sequence(self, tag, sequence, flow_style=None):
value = []
node = SequenceNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
for item in sequence:
node_item = self.represent_data(item)
if not (isinstance(node_item, ScalarNode) and not node_item.style):
best_style = False
value.append(node_item)
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
def represent_mapping(self, tag, mapping, flow_style=None):
value = []
node = MappingNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
if hasattr(mapping, 'items'):
mapping = list(mapping.items())
if self.sort_keys:
try:
mapping = sorted(mapping)
except TypeError:
pass
for item_key, item_value in mapping:
node_key = self.represent_data(item_key)
node_value = self.represent_data(item_value)
if not (isinstance(node_key, ScalarNode) and not node_key.style):
best_style = False
if not (isinstance(node_value, ScalarNode) and not node_value.style):
best_style = False
value.append((node_key, node_value))
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
def ignore_aliases(self, data):
return False
class SafeRepresenter(BaseRepresenter):
def ignore_aliases(self, data):
if data is None:
return True
if isinstance(data, tuple) and data == ():
return True
if isinstance(data, (str, bytes, bool, int, float)):
return True
def represent_none(self, data):
return self.represent_scalar('tag:yaml.org,2002:null', 'null')
def represent_str(self, data):
return self.represent_scalar('tag:yaml.org,2002:str', data)
def represent_binary(self, data):
if hasattr(base64, 'encodebytes'):
data = base64.encodebytes(data).decode('ascii')
else:
data = base64.encodestring(data).decode('ascii')
return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|')
def represent_bool(self, data):
if data:
value = 'true'
else:
value = 'false'
return self.represent_scalar('tag:yaml.org,2002:bool', value)
def represent_int(self, data):
return self.represent_scalar('tag:yaml.org,2002:int', str(data))
inf_value = 1e300
while repr(inf_value) != repr(inf_value*inf_value):
inf_value *= inf_value
def represent_float(self, data):
if data != data or (data == 0.0 and data == 1.0):
value = '.nan'
elif data == self.inf_value:
value = '.inf'
elif data == -self.inf_value:
value = '-.inf'
else:
value = repr(data).lower()
# Note that in some cases `repr(data)` represents a float number
# without the decimal parts. For instance:
# >>> repr(1e17)
# '1e17'
# Unfortunately, this is not a valid float representation according
# to the definition of the `!!float` tag. We fix this by adding
# '.0' before the 'e' symbol.
if '.' not in value and 'e' in value:
value = value.replace('e', '.0e', 1)
return self.represent_scalar('tag:yaml.org,2002:float', value)
def represent_list(self, data):
#pairs = (len(data) > 0 and isinstance(data, list))
#if pairs:
# for item in data:
# if not isinstance(item, tuple) or len(item) != 2:
# pairs = False
# break
#if not pairs:
return self.represent_sequence('tag:yaml.org,2002:seq', data)
#value = []
#for item_key, item_value in data:
# value.append(self.represent_mapping(u'tag:yaml.org,2002:map',
# [(item_key, item_value)]))
#return SequenceNode(u'tag:yaml.org,2002:pairs', value)
def represent_dict(self, data):
return self.represent_mapping('tag:yaml.org,2002:map', data)
def represent_set(self, data):
value = {}
for key in data:
value[key] = None
return self.represent_mapping('tag:yaml.org,2002:set', value)
def represent_date(self, data):
value = data.isoformat()
return self.represent_scalar('tag:yaml.org,2002:timestamp', value)
def represent_datetime(self, data):
value = data.isoformat(' ')
return self.represent_scalar('tag:yaml.org,2002:timestamp', value)
def represent_yaml_object(self, tag, data, cls, flow_style=None):
if hasattr(data, '__getstate__'):
state = data.__getstate__()
else:
state = data.__dict__.copy()
return self.represent_mapping(tag, state, flow_style=flow_style)
def represent_undefined(self, data):
raise RepresenterError("cannot represent an object", data)
SafeRepresenter.add_representer(type(None),
SafeRepresenter.represent_none)
SafeRepresenter.add_representer(str,
SafeRepresenter.represent_str)
SafeRepresenter.add_representer(bytes,
SafeRepresenter.represent_binary)
SafeRepresenter.add_representer(bool,
SafeRepresenter.represent_bool)
SafeRepresenter.add_representer(int,
SafeRepresenter.represent_int)
SafeRepresenter.add_representer(float,
SafeRepresenter.represent_float)
SafeRepresenter.add_representer(list,
SafeRepresenter.represent_list)
SafeRepresenter.add_representer(tuple,
SafeRepresenter.represent_list)
SafeRepresenter.add_representer(dict,
SafeRepresenter.represent_dict)
SafeRepresenter.add_representer(set,
SafeRepresenter.represent_set)
SafeRepresenter.add_representer(datetime.date,
SafeRepresenter.represent_date)
SafeRepresenter.add_representer(datetime.datetime,
SafeRepresenter.represent_datetime)
SafeRepresenter.add_representer(None,
SafeRepresenter.represent_undefined)
class Representer(SafeRepresenter):
def represent_complex(self, data):
if data.imag == 0.0:
data = '%r' % data.real
elif data.real == 0.0:
data = '%rj' % data.imag
elif data.imag > 0:
data = '%r+%rj' % (data.real, data.imag)
else:
data = '%r%rj' % (data.real, data.imag)
return self.represent_scalar('tag:yaml.org,2002:python/complex', data)
def represent_tuple(self, data):
return self.represent_sequence('tag:yaml.org,2002:python/tuple', data)
def represent_name(self, data):
name = '%s.%s' % (data.__module__, data.__name__)
return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '')
def represent_module(self, data):
return self.represent_scalar(
'tag:yaml.org,2002:python/module:'+data.__name__, '')
def represent_object(self, data):
# We use __reduce__ API to save the data. data.__reduce__ returns
# a tuple of length 2-5:
# (function, args, state, listitems, dictitems)
# For reconstructing, we calls function(*args), then set its state,
# listitems, and dictitems if they are not None.
# A special case is when function.__name__ == '__newobj__'. In this
# case we create the object with args[0].__new__(*args).
# Another special case is when __reduce__ returns a string - we don't
# support it.
# We produce a !!python/object, !!python/object/new or
# !!python/object/apply node.
cls = type(data)
if cls in copyreg.dispatch_table:
reduce = copyreg.dispatch_table[cls](data)
elif hasattr(data, '__reduce_ex__'):
reduce = data.__reduce_ex__(2)
elif hasattr(data, '__reduce__'):
reduce = data.__reduce__()
else:
raise RepresenterError("cannot represent an object", data)
reduce = (list(reduce)+[None]*5)[:5]
function, args, state, listitems, dictitems = reduce
args = list(args)
if state is None:
state = {}
if listitems is not None:
listitems = list(listitems)
if dictitems is not None:
dictitems = dict(dictitems)
if function.__name__ == '__newobj__':
function = args[0]
args = args[1:]
tag = 'tag:yaml.org,2002:python/object/new:'
newobj = True
else:
tag = 'tag:yaml.org,2002:python/object/apply:'
newobj = False
function_name = '%s.%s' % (function.__module__, function.__name__)
if not args and not listitems and not dictitems \
and isinstance(state, dict) and newobj:
return self.represent_mapping(
'tag:yaml.org,2002:python/object:'+function_name, state)
if not listitems and not dictitems \
and isinstance(state, dict) and not state:
return self.represent_sequence(tag+function_name, args)
value = {}
if args:
value['args'] = args
if state or not isinstance(state, dict):
value['state'] = state
if listitems:
value['listitems'] = listitems
if dictitems:
value['dictitems'] = dictitems
return self.represent_mapping(tag+function_name, value)
def represent_ordered_dict(self, data):
# Provide uniform representation across different Python versions.
data_type = type(data)
tag = 'tag:yaml.org,2002:python/object/apply:%s.%s' \
% (data_type.__module__, data_type.__name__)
items = [[key, value] for key, value in data.items()]
return self.represent_sequence(tag, [items])
Representer.add_representer(complex,
Representer.represent_complex)
Representer.add_representer(tuple,
Representer.represent_tuple)
Representer.add_representer(type,
Representer.represent_name)
Representer.add_representer(collections.OrderedDict,
Representer.represent_ordered_dict)
Representer.add_representer(types.FunctionType,
Representer.represent_name)
Representer.add_representer(types.BuiltinFunctionType,
Representer.represent_name)
Representer.add_representer(types.ModuleType,
Representer.represent_module)
Representer.add_multi_representer(object,
Representer.represent_object)
================================================
FILE: metaflow/_vendor/yaml/resolver.py
================================================
__all__ = ['BaseResolver', 'Resolver']
from .error import *
from .nodes import *
import re
class ResolverError(YAMLError):
pass
class BaseResolver:
DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str'
DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq'
DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map'
yaml_implicit_resolvers = {}
yaml_path_resolvers = {}
def __init__(self):
self.resolver_exact_paths = []
self.resolver_prefix_paths = []
@classmethod
def add_implicit_resolver(cls, tag, regexp, first):
if not 'yaml_implicit_resolvers' in cls.__dict__:
implicit_resolvers = {}
for key in cls.yaml_implicit_resolvers:
implicit_resolvers[key] = cls.yaml_implicit_resolvers[key][:]
cls.yaml_implicit_resolvers = implicit_resolvers
if first is None:
first = [None]
for ch in first:
cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp))
@classmethod
def add_path_resolver(cls, tag, path, kind=None):
# Note: `add_path_resolver` is experimental. The API could be changed.
# `new_path` is a pattern that is matched against the path from the
# root to the node that is being considered. `node_path` elements are
# tuples `(node_check, index_check)`. `node_check` is a node class:
# `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None`
# matches any kind of a node. `index_check` could be `None`, a boolean
# value, a string value, or a number. `None` and `False` match against
# any _value_ of sequence and mapping nodes. `True` matches against
# any _key_ of a mapping node. A string `index_check` matches against
# a mapping value that corresponds to a scalar key which content is
# equal to the `index_check` value. An integer `index_check` matches
# against a sequence value with the index equal to `index_check`.
if not 'yaml_path_resolvers' in cls.__dict__:
cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy()
new_path = []
for element in path:
if isinstance(element, (list, tuple)):
if len(element) == 2:
node_check, index_check = element
elif len(element) == 1:
node_check = element[0]
index_check = True
else:
raise ResolverError("Invalid path element: %s" % element)
else:
node_check = None
index_check = element
if node_check is str:
node_check = ScalarNode
elif node_check is list:
node_check = SequenceNode
elif node_check is dict:
node_check = MappingNode
elif node_check not in [ScalarNode, SequenceNode, MappingNode] \
and not isinstance(node_check, str) \
and node_check is not None:
raise ResolverError("Invalid node checker: %s" % node_check)
if not isinstance(index_check, (str, int)) \
and index_check is not None:
raise ResolverError("Invalid index checker: %s" % index_check)
new_path.append((node_check, index_check))
if kind is str:
kind = ScalarNode
elif kind is list:
kind = SequenceNode
elif kind is dict:
kind = MappingNode
elif kind not in [ScalarNode, SequenceNode, MappingNode] \
and kind is not None:
raise ResolverError("Invalid node kind: %s" % kind)
cls.yaml_path_resolvers[tuple(new_path), kind] = tag
def descend_resolver(self, current_node, current_index):
if not self.yaml_path_resolvers:
return
exact_paths = {}
prefix_paths = []
if current_node:
depth = len(self.resolver_prefix_paths)
for path, kind in self.resolver_prefix_paths[-1]:
if self.check_resolver_prefix(depth, path, kind,
current_node, current_index):
if len(path) > depth:
prefix_paths.append((path, kind))
else:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
for path, kind in self.yaml_path_resolvers:
if not path:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
prefix_paths.append((path, kind))
self.resolver_exact_paths.append(exact_paths)
self.resolver_prefix_paths.append(prefix_paths)
def ascend_resolver(self):
if not self.yaml_path_resolvers:
return
self.resolver_exact_paths.pop()
self.resolver_prefix_paths.pop()
def check_resolver_prefix(self, depth, path, kind,
current_node, current_index):
node_check, index_check = path[depth-1]
if isinstance(node_check, str):
if current_node.tag != node_check:
return
elif node_check is not None:
if not isinstance(current_node, node_check):
return
if index_check is True and current_index is not None:
return
if (index_check is False or index_check is None) \
and current_index is None:
return
if isinstance(index_check, str):
if not (isinstance(current_index, ScalarNode)
and index_check == current_index.value):
return
elif isinstance(index_check, int) and not isinstance(index_check, bool):
if index_check != current_index:
return
return True
def resolve(self, kind, value, implicit):
if kind is ScalarNode and implicit[0]:
if value == '':
resolvers = self.yaml_implicit_resolvers.get('', [])
else:
resolvers = self.yaml_implicit_resolvers.get(value[0], [])
resolvers += self.yaml_implicit_resolvers.get(None, [])
for tag, regexp in resolvers:
if regexp.match(value):
return tag
implicit = implicit[1]
if self.yaml_path_resolvers:
exact_paths = self.resolver_exact_paths[-1]
if kind in exact_paths:
return exact_paths[kind]
if None in exact_paths:
return exact_paths[None]
if kind is ScalarNode:
return self.DEFAULT_SCALAR_TAG
elif kind is SequenceNode:
return self.DEFAULT_SEQUENCE_TAG
elif kind is MappingNode:
return self.DEFAULT_MAPPING_TAG
class Resolver(BaseResolver):
pass
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:bool',
re.compile(r'''^(?:yes|Yes|YES|no|No|NO
|true|True|TRUE|false|False|FALSE
|on|On|ON|off|Off|OFF)$''', re.X),
list('yYnNtTfFoO'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:float',
re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
|\.[0-9_]+(?:[eE][-+][0-9]+)?
|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
|[-+]?\.(?:inf|Inf|INF)
|\.(?:nan|NaN|NAN))$''', re.X),
list('-+0123456789.'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:int',
re.compile(r'''^(?:[-+]?0b[0-1_]+
|[-+]?0[0-7_]+
|[-+]?(?:0|[1-9][0-9_]*)
|[-+]?0x[0-9a-fA-F_]+
|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X),
list('-+0123456789'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:merge',
re.compile(r'^(?:<<)$'),
['<'])
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:null',
re.compile(r'''^(?: ~
|null|Null|NULL
| )$''', re.X),
['~', 'n', 'N', ''])
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:timestamp',
re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
|[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
(?:[Tt]|[ \t]+)[0-9][0-9]?
:[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)?
(?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X),
list('0123456789'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:value',
re.compile(r'^(?:=)$'),
['='])
# The following resolver is only for documentation purposes. It cannot work
# because plain scalars cannot start with '!', '&', or '*'.
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:yaml',
re.compile(r'^(?:!|&|\*)$'),
list('!&*'))
================================================
FILE: metaflow/_vendor/yaml/scanner.py
================================================
# Scanner produces tokens of the following types:
# STREAM-START
# STREAM-END
# DIRECTIVE(name, value)
# DOCUMENT-START
# DOCUMENT-END
# BLOCK-SEQUENCE-START
# BLOCK-MAPPING-START
# BLOCK-END
# FLOW-SEQUENCE-START
# FLOW-MAPPING-START
# FLOW-SEQUENCE-END
# FLOW-MAPPING-END
# BLOCK-ENTRY
# FLOW-ENTRY
# KEY
# VALUE
# ALIAS(value)
# ANCHOR(value)
# TAG(value)
# SCALAR(value, plain, style)
#
# Read comments in the Scanner code for more details.
#
__all__ = ['Scanner', 'ScannerError']
from .error import MarkedYAMLError
from .tokens import *
class ScannerError(MarkedYAMLError):
pass
class SimpleKey:
# See below simple keys treatment.
def __init__(self, token_number, required, index, line, column, mark):
self.token_number = token_number
self.required = required
self.index = index
self.line = line
self.column = column
self.mark = mark
class Scanner:
def __init__(self):
"""Initialize the scanner."""
# It is assumed that Scanner and Reader will have a common descendant.
# Reader do the dirty work of checking for BOM and converting the
# input data to Unicode. It also adds NUL to the end.
#
# Reader supports the following methods
# self.peek(i=0) # peek the next i-th character
# self.prefix(l=1) # peek the next l characters
# self.forward(l=1) # read the next l characters and move the pointer.
# Had we reached the end of the stream?
self.done = False
# The number of unclosed '{' and '['. `flow_level == 0` means block
# context.
self.flow_level = 0
# List of processed tokens that are not yet emitted.
self.tokens = []
# Add the STREAM-START token.
self.fetch_stream_start()
# Number of tokens that were emitted through the `get_token` method.
self.tokens_taken = 0
# The current indentation level.
self.indent = -1
# Past indentation levels.
self.indents = []
# Variables related to simple keys treatment.
# A simple key is a key that is not denoted by the '?' indicator.
# Example of simple keys:
# ---
# block simple key: value
# ? not a simple key:
# : { flow simple key: value }
# We emit the KEY token before all keys, so when we find a potential
# simple key, we try to locate the corresponding ':' indicator.
# Simple keys should be limited to a single line and 1024 characters.
# Can a simple key start at the current position? A simple key may
# start:
# - at the beginning of the line, not counting indentation spaces
# (in block context),
# - after '{', '[', ',' (in the flow context),
# - after '?', ':', '-' (in the block context).
# In the block context, this flag also signifies if a block collection
# may start at the current position.
self.allow_simple_key = True
# Keep track of possible simple keys. This is a dictionary. The key
# is `flow_level`; there can be no more that one possible simple key
# for each level. The value is a SimpleKey record:
# (token_number, required, index, line, column, mark)
# A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
# '[', or '{' tokens.
self.possible_simple_keys = {}
# Public methods.
def check_token(self, *choices):
# Check if the next token is one of the given types.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
if not choices:
return True
for choice in choices:
if isinstance(self.tokens[0], choice):
return True
return False
def peek_token(self):
# Return the next token, but do not delete if from the queue.
# Return None if no more tokens.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
return self.tokens[0]
else:
return None
def get_token(self):
# Return the next token.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
self.tokens_taken += 1
return self.tokens.pop(0)
# Private methods.
def need_more_tokens(self):
if self.done:
return False
if not self.tokens:
return True
# The current token may be a potential simple key, so we
# need to look further.
self.stale_possible_simple_keys()
if self.next_possible_simple_key() == self.tokens_taken:
return True
def fetch_more_tokens(self):
# Eat whitespaces and comments until we reach the next token.
self.scan_to_next_token()
# Remove obsolete possible simple keys.
self.stale_possible_simple_keys()
# Compare the current indentation and column. It may add some tokens
# and decrease the current indentation level.
self.unwind_indent(self.column)
# Peek the next character.
ch = self.peek()
# Is it the end of stream?
if ch == '\0':
return self.fetch_stream_end()
# Is it a directive?
if ch == '%' and self.check_directive():
return self.fetch_directive()
# Is it the document start?
if ch == '-' and self.check_document_start():
return self.fetch_document_start()
# Is it the document end?
if ch == '.' and self.check_document_end():
return self.fetch_document_end()
# TODO: support for BOM within a stream.
#if ch == '\uFEFF':
# return self.fetch_bom() <-- issue BOMToken
# Note: the order of the following checks is NOT significant.
# Is it the flow sequence start indicator?
if ch == '[':
return self.fetch_flow_sequence_start()
# Is it the flow mapping start indicator?
if ch == '{':
return self.fetch_flow_mapping_start()
# Is it the flow sequence end indicator?
if ch == ']':
return self.fetch_flow_sequence_end()
# Is it the flow mapping end indicator?
if ch == '}':
return self.fetch_flow_mapping_end()
# Is it the flow entry indicator?
if ch == ',':
return self.fetch_flow_entry()
# Is it the block entry indicator?
if ch == '-' and self.check_block_entry():
return self.fetch_block_entry()
# Is it the key indicator?
if ch == '?' and self.check_key():
return self.fetch_key()
# Is it the value indicator?
if ch == ':' and self.check_value():
return self.fetch_value()
# Is it an alias?
if ch == '*':
return self.fetch_alias()
# Is it an anchor?
if ch == '&':
return self.fetch_anchor()
# Is it a tag?
if ch == '!':
return self.fetch_tag()
# Is it a literal scalar?
if ch == '|' and not self.flow_level:
return self.fetch_literal()
# Is it a folded scalar?
if ch == '>' and not self.flow_level:
return self.fetch_folded()
# Is it a single quoted scalar?
if ch == '\'':
return self.fetch_single()
# Is it a double quoted scalar?
if ch == '\"':
return self.fetch_double()
# It must be a plain scalar then.
if self.check_plain():
return self.fetch_plain()
# No? It's an error. Let's produce a nice error message.
raise ScannerError("while scanning for the next token", None,
"found character %r that cannot start any token" % ch,
self.get_mark())
# Simple keys treatment.
def next_possible_simple_key(self):
# Return the number of the nearest possible simple key. Actually we
# don't need to loop through the whole dictionary. We may replace it
# with the following code:
# if not self.possible_simple_keys:
# return None
# return self.possible_simple_keys[
# min(self.possible_simple_keys.keys())].token_number
min_token_number = None
for level in self.possible_simple_keys:
key = self.possible_simple_keys[level]
if min_token_number is None or key.token_number < min_token_number:
min_token_number = key.token_number
return min_token_number
def stale_possible_simple_keys(self):
# Remove entries that are no longer possible simple keys. According to
# the YAML specification, simple keys
# - should be limited to a single line,
# - should be no longer than 1024 characters.
# Disabling this procedure will allow simple keys of any length and
# height (may cause problems if indentation is broken though).
for level in list(self.possible_simple_keys):
key = self.possible_simple_keys[level]
if key.line != self.line \
or self.index-key.index > 1024:
if key.required:
raise ScannerError("while scanning a simple key", key.mark,
"could not find expected ':'", self.get_mark())
del self.possible_simple_keys[level]
def save_possible_simple_key(self):
# The next token may start a simple key. We check if it's possible
# and save its position. This function is called for
# ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
# Check if a simple key is required at the current position.
required = not self.flow_level and self.indent == self.column
# The next token might be a simple key. Let's save it's number and
# position.
if self.allow_simple_key:
self.remove_possible_simple_key()
token_number = self.tokens_taken+len(self.tokens)
key = SimpleKey(token_number, required,
self.index, self.line, self.column, self.get_mark())
self.possible_simple_keys[self.flow_level] = key
def remove_possible_simple_key(self):
# Remove the saved possible key position at the current flow level.
if self.flow_level in self.possible_simple_keys:
key = self.possible_simple_keys[self.flow_level]
if key.required:
raise ScannerError("while scanning a simple key", key.mark,
"could not find expected ':'", self.get_mark())
del self.possible_simple_keys[self.flow_level]
# Indentation functions.
def unwind_indent(self, column):
## In flow context, tokens should respect indentation.
## Actually the condition should be `self.indent >= column` according to
## the spec. But this condition will prohibit intuitively correct
## constructions such as
## key : {
## }
#if self.flow_level and self.indent > column:
# raise ScannerError(None, None,
# "invalid indentation or unclosed '[' or '{'",
# self.get_mark())
# In the flow context, indentation is ignored. We make the scanner less
# restrictive then specification requires.
if self.flow_level:
return
# In block context, we may need to issue the BLOCK-END tokens.
while self.indent > column:
mark = self.get_mark()
self.indent = self.indents.pop()
self.tokens.append(BlockEndToken(mark, mark))
def add_indent(self, column):
# Check if we need to increase indentation.
if self.indent < column:
self.indents.append(self.indent)
self.indent = column
return True
return False
# Fetchers.
def fetch_stream_start(self):
# We always add STREAM-START as the first token and STREAM-END as the
# last token.
# Read the token.
mark = self.get_mark()
# Add STREAM-START.
self.tokens.append(StreamStartToken(mark, mark,
encoding=self.encoding))
def fetch_stream_end(self):
# Set the current indentation to -1.
self.unwind_indent(-1)
# Reset simple keys.
self.remove_possible_simple_key()
self.allow_simple_key = False
self.possible_simple_keys = {}
# Read the token.
mark = self.get_mark()
# Add STREAM-END.
self.tokens.append(StreamEndToken(mark, mark))
# The steam is finished.
self.done = True
def fetch_directive(self):
# Set the current indentation to -1.
self.unwind_indent(-1)
# Reset simple keys.
self.remove_possible_simple_key()
self.allow_simple_key = False
# Scan and add DIRECTIVE.
self.tokens.append(self.scan_directive())
def fetch_document_start(self):
self.fetch_document_indicator(DocumentStartToken)
def fetch_document_end(self):
self.fetch_document_indicator(DocumentEndToken)
def fetch_document_indicator(self, TokenClass):
# Set the current indentation to -1.
self.unwind_indent(-1)
# Reset simple keys. Note that there could not be a block collection
# after '---'.
self.remove_possible_simple_key()
self.allow_simple_key = False
# Add DOCUMENT-START or DOCUMENT-END.
start_mark = self.get_mark()
self.forward(3)
end_mark = self.get_mark()
self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_sequence_start(self):
self.fetch_flow_collection_start(FlowSequenceStartToken)
def fetch_flow_mapping_start(self):
self.fetch_flow_collection_start(FlowMappingStartToken)
def fetch_flow_collection_start(self, TokenClass):
# '[' and '{' may start a simple key.
self.save_possible_simple_key()
# Increase the flow level.
self.flow_level += 1
# Simple keys are allowed after '[' and '{'.
self.allow_simple_key = True
# Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_sequence_end(self):
self.fetch_flow_collection_end(FlowSequenceEndToken)
def fetch_flow_mapping_end(self):
self.fetch_flow_collection_end(FlowMappingEndToken)
def fetch_flow_collection_end(self, TokenClass):
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Decrease the flow level.
self.flow_level -= 1
# No simple keys after ']' or '}'.
self.allow_simple_key = False
# Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_entry(self):
# Simple keys are allowed after ','.
self.allow_simple_key = True
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Add FLOW-ENTRY.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(FlowEntryToken(start_mark, end_mark))
def fetch_block_entry(self):
# Block context needs additional checks.
if not self.flow_level:
# Are we allowed to start a new entry?
if not self.allow_simple_key:
raise ScannerError(None, None,
"sequence entries are not allowed here",
self.get_mark())
# We may need to add BLOCK-SEQUENCE-START.
if self.add_indent(self.column):
mark = self.get_mark()
self.tokens.append(BlockSequenceStartToken(mark, mark))
# It's an error for the block entry to occur in the flow context,
# but we let the parser detect this.
else:
pass
# Simple keys are allowed after '-'.
self.allow_simple_key = True
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Add BLOCK-ENTRY.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(BlockEntryToken(start_mark, end_mark))
def fetch_key(self):
# Block context needs additional checks.
if not self.flow_level:
# Are we allowed to start a key (not necessary a simple)?
if not self.allow_simple_key:
raise ScannerError(None, None,
"mapping keys are not allowed here",
self.get_mark())
# We may need to add BLOCK-MAPPING-START.
if self.add_indent(self.column):
mark = self.get_mark()
self.tokens.append(BlockMappingStartToken(mark, mark))
# Simple keys are allowed after '?' in the block context.
self.allow_simple_key = not self.flow_level
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Add KEY.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(KeyToken(start_mark, end_mark))
def fetch_value(self):
# Do we determine a simple key?
if self.flow_level in self.possible_simple_keys:
# Add KEY.
key = self.possible_simple_keys[self.flow_level]
del self.possible_simple_keys[self.flow_level]
self.tokens.insert(key.token_number-self.tokens_taken,
KeyToken(key.mark, key.mark))
# If this key starts a new block mapping, we need to add
# BLOCK-MAPPING-START.
if not self.flow_level:
if self.add_indent(key.column):
self.tokens.insert(key.token_number-self.tokens_taken,
BlockMappingStartToken(key.mark, key.mark))
# There cannot be two simple keys one after another.
self.allow_simple_key = False
# It must be a part of a complex key.
else:
# Block context needs additional checks.
# (Do we really need them? They will be caught by the parser
# anyway.)
if not self.flow_level:
# We are allowed to start a complex value if and only if
# we can start a simple key.
if not self.allow_simple_key:
raise ScannerError(None, None,
"mapping values are not allowed here",
self.get_mark())
# If this value starts a new block mapping, we need to add
# BLOCK-MAPPING-START. It will be detected as an error later by
# the parser.
if not self.flow_level:
if self.add_indent(self.column):
mark = self.get_mark()
self.tokens.append(BlockMappingStartToken(mark, mark))
# Simple keys are allowed after ':' in the block context.
self.allow_simple_key = not self.flow_level
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Add VALUE.
start_mark = self.get_mark()
self.forward()
end_mark = self.get_mark()
self.tokens.append(ValueToken(start_mark, end_mark))
def fetch_alias(self):
# ALIAS could be a simple key.
self.save_possible_simple_key()
# No simple keys after ALIAS.
self.allow_simple_key = False
# Scan and add ALIAS.
self.tokens.append(self.scan_anchor(AliasToken))
def fetch_anchor(self):
# ANCHOR could start a simple key.
self.save_possible_simple_key()
# No simple keys after ANCHOR.
self.allow_simple_key = False
# Scan and add ANCHOR.
self.tokens.append(self.scan_anchor(AnchorToken))
def fetch_tag(self):
# TAG could start a simple key.
self.save_possible_simple_key()
# No simple keys after TAG.
self.allow_simple_key = False
# Scan and add TAG.
self.tokens.append(self.scan_tag())
def fetch_literal(self):
self.fetch_block_scalar(style='|')
def fetch_folded(self):
self.fetch_block_scalar(style='>')
def fetch_block_scalar(self, style):
# A simple key may follow a block scalar.
self.allow_simple_key = True
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
# Scan and add SCALAR.
self.tokens.append(self.scan_block_scalar(style))
def fetch_single(self):
self.fetch_flow_scalar(style='\'')
def fetch_double(self):
self.fetch_flow_scalar(style='"')
def fetch_flow_scalar(self, style):
# A flow scalar could be a simple key.
self.save_possible_simple_key()
# No simple keys after flow scalars.
self.allow_simple_key = False
# Scan and add SCALAR.
self.tokens.append(self.scan_flow_scalar(style))
def fetch_plain(self):
# A plain scalar could be a simple key.
self.save_possible_simple_key()
# No simple keys after plain scalars. But note that `scan_plain` will
# change this flag if the scan is finished at the beginning of the
# line.
self.allow_simple_key = False
# Scan and add SCALAR. May change `allow_simple_key`.
self.tokens.append(self.scan_plain())
# Checkers.
def check_directive(self):
# DIRECTIVE: ^ '%' ...
# The '%' indicator is already checked.
if self.column == 0:
return True
def check_document_start(self):
# DOCUMENT-START: ^ '---' (' '|'\n')
if self.column == 0:
if self.prefix(3) == '---' \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
return True
def check_document_end(self):
# DOCUMENT-END: ^ '...' (' '|'\n')
if self.column == 0:
if self.prefix(3) == '...' \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
return True
def check_block_entry(self):
# BLOCK-ENTRY: '-' (' '|'\n')
return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
def check_key(self):
# KEY(flow context): '?'
if self.flow_level:
return True
# KEY(block context): '?' (' '|'\n')
else:
return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
def check_value(self):
# VALUE(flow context): ':'
if self.flow_level:
return True
# VALUE(block context): ':' (' '|'\n')
else:
return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
def check_plain(self):
# A plain scalar may start with any non-space character except:
# '-', '?', ':', ',', '[', ']', '{', '}',
# '#', '&', '*', '!', '|', '>', '\'', '\"',
# '%', '@', '`'.
#
# It may also start with
# '-', '?', ':'
# if it is followed by a non-space character.
#
# Note that we limit the last rule to the block context (except the
# '-' character) because we want the flow context to be space
# independent.
ch = self.peek()
return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \
or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029'
and (ch == '-' or (not self.flow_level and ch in '?:')))
# Scanners.
def scan_to_next_token(self):
# We ignore spaces, line breaks and comments.
# If we find a line break in the block context, we set the flag
# `allow_simple_key` on.
# The byte order mark is stripped if it's the first character in the
# stream. We do not yet support BOM inside the stream as the
# specification requires. Any such mark will be considered as a part
# of the document.
#
# TODO: We need to make tab handling rules more sane. A good rule is
# Tabs cannot precede tokens
# BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
# KEY(block), VALUE(block), BLOCK-ENTRY
# So the checking code is
# if :
# self.allow_simple_keys = False
# We also need to add the check for `allow_simple_keys == True` to
# `unwind_indent` before issuing BLOCK-END.
# Scanners for block, flow, and plain scalars need to be modified.
if self.index == 0 and self.peek() == '\uFEFF':
self.forward()
found = False
while not found:
while self.peek() == ' ':
self.forward()
if self.peek() == '#':
while self.peek() not in '\0\r\n\x85\u2028\u2029':
self.forward()
if self.scan_line_break():
if not self.flow_level:
self.allow_simple_key = True
else:
found = True
def scan_directive(self):
# See the specification for details.
start_mark = self.get_mark()
self.forward()
name = self.scan_directive_name(start_mark)
value = None
if name == 'YAML':
value = self.scan_yaml_directive_value(start_mark)
end_mark = self.get_mark()
elif name == 'TAG':
value = self.scan_tag_directive_value(start_mark)
end_mark = self.get_mark()
else:
end_mark = self.get_mark()
while self.peek() not in '\0\r\n\x85\u2028\u2029':
self.forward()
self.scan_directive_ignored_line(start_mark)
return DirectiveToken(name, value, start_mark, end_mark)
def scan_directive_name(self, start_mark):
# See the specification for details.
length = 0
ch = self.peek(length)
while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_':
length += 1
ch = self.peek(length)
if not length:
raise ScannerError("while scanning a directive", start_mark,
"expected alphabetic or numeric character, but found %r"
% ch, self.get_mark())
value = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected alphabetic or numeric character, but found %r"
% ch, self.get_mark())
return value
def scan_yaml_directive_value(self, start_mark):
# See the specification for details.
while self.peek() == ' ':
self.forward()
major = self.scan_yaml_directive_number(start_mark)
if self.peek() != '.':
raise ScannerError("while scanning a directive", start_mark,
"expected a digit or '.', but found %r" % self.peek(),
self.get_mark())
self.forward()
minor = self.scan_yaml_directive_number(start_mark)
if self.peek() not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected a digit or ' ', but found %r" % self.peek(),
self.get_mark())
return (major, minor)
def scan_yaml_directive_number(self, start_mark):
# See the specification for details.
ch = self.peek()
if not ('0' <= ch <= '9'):
raise ScannerError("while scanning a directive", start_mark,
"expected a digit, but found %r" % ch, self.get_mark())
length = 0
while '0' <= self.peek(length) <= '9':
length += 1
value = int(self.prefix(length))
self.forward(length)
return value
def scan_tag_directive_value(self, start_mark):
# See the specification for details.
while self.peek() == ' ':
self.forward()
handle = self.scan_tag_directive_handle(start_mark)
while self.peek() == ' ':
self.forward()
prefix = self.scan_tag_directive_prefix(start_mark)
return (handle, prefix)
def scan_tag_directive_handle(self, start_mark):
# See the specification for details.
value = self.scan_tag_handle('directive', start_mark)
ch = self.peek()
if ch != ' ':
raise ScannerError("while scanning a directive", start_mark,
"expected ' ', but found %r" % ch, self.get_mark())
return value
def scan_tag_directive_prefix(self, start_mark):
# See the specification for details.
value = self.scan_tag_uri('directive', start_mark)
ch = self.peek()
if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected ' ', but found %r" % ch, self.get_mark())
return value
def scan_directive_ignored_line(self, start_mark):
# See the specification for details.
while self.peek() == ' ':
self.forward()
if self.peek() == '#':
while self.peek() not in '\0\r\n\x85\u2028\u2029':
self.forward()
ch = self.peek()
if ch not in '\0\r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected a comment or a line break, but found %r"
% ch, self.get_mark())
self.scan_line_break()
def scan_anchor(self, TokenClass):
# The specification does not restrict characters for anchors and
# aliases. This may lead to problems, for instance, the document:
# [ *alias, value ]
# can be interpreted in two ways, as
# [ "value" ]
# and
# [ *alias , "value" ]
# Therefore we restrict aliases to numbers and ASCII letters.
start_mark = self.get_mark()
indicator = self.peek()
if indicator == '*':
name = 'alias'
else:
name = 'anchor'
self.forward()
length = 0
ch = self.peek(length)
while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_':
length += 1
ch = self.peek(length)
if not length:
raise ScannerError("while scanning an %s" % name, start_mark,
"expected alphabetic or numeric character, but found %r"
% ch, self.get_mark())
value = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
raise ScannerError("while scanning an %s" % name, start_mark,
"expected alphabetic or numeric character, but found %r"
% ch, self.get_mark())
end_mark = self.get_mark()
return TokenClass(value, start_mark, end_mark)
def scan_tag(self):
# See the specification for details.
start_mark = self.get_mark()
ch = self.peek(1)
if ch == '<':
handle = None
self.forward(2)
suffix = self.scan_tag_uri('tag', start_mark)
if self.peek() != '>':
raise ScannerError("while parsing a tag", start_mark,
"expected '>', but found %r" % self.peek(),
self.get_mark())
self.forward()
elif ch in '\0 \t\r\n\x85\u2028\u2029':
handle = None
suffix = '!'
self.forward()
else:
length = 1
use_handle = False
while ch not in '\0 \r\n\x85\u2028\u2029':
if ch == '!':
use_handle = True
break
length += 1
ch = self.peek(length)
handle = '!'
if use_handle:
handle = self.scan_tag_handle('tag', start_mark)
else:
handle = '!'
self.forward()
suffix = self.scan_tag_uri('tag', start_mark)
ch = self.peek()
if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a tag", start_mark,
"expected ' ', but found %r" % ch, self.get_mark())
value = (handle, suffix)
end_mark = self.get_mark()
return TagToken(value, start_mark, end_mark)
def scan_block_scalar(self, style):
# See the specification for details.
if style == '>':
folded = True
else:
folded = False
chunks = []
start_mark = self.get_mark()
# Scan the header.
self.forward()
chomping, increment = self.scan_block_scalar_indicators(start_mark)
self.scan_block_scalar_ignored_line(start_mark)
# Determine the indentation level and go to the first non-empty line.
min_indent = self.indent+1
if min_indent < 1:
min_indent = 1
if increment is None:
breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
indent = max(min_indent, max_indent)
else:
indent = min_indent+increment-1
breaks, end_mark = self.scan_block_scalar_breaks(indent)
line_break = ''
# Scan the inner part of the block scalar.
while self.column == indent and self.peek() != '\0':
chunks.extend(breaks)
leading_non_space = self.peek() not in ' \t'
length = 0
while self.peek(length) not in '\0\r\n\x85\u2028\u2029':
length += 1
chunks.append(self.prefix(length))
self.forward(length)
line_break = self.scan_line_break()
breaks, end_mark = self.scan_block_scalar_breaks(indent)
if self.column == indent and self.peek() != '\0':
# Unfortunately, folding rules are ambiguous.
#
# This is the folding according to the specification:
if folded and line_break == '\n' \
and leading_non_space and self.peek() not in ' \t':
if not breaks:
chunks.append(' ')
else:
chunks.append(line_break)
# This is Clark Evans's interpretation (also in the spec
# examples):
#
#if folded and line_break == '\n':
# if not breaks:
# if self.peek() not in ' \t':
# chunks.append(' ')
# else:
# chunks.append(line_break)
#else:
# chunks.append(line_break)
else:
break
# Chomp the tail.
if chomping is not False:
chunks.append(line_break)
if chomping is True:
chunks.extend(breaks)
# We are done.
return ScalarToken(''.join(chunks), False, start_mark, end_mark,
style)
def scan_block_scalar_indicators(self, start_mark):
# See the specification for details.
chomping = None
increment = None
ch = self.peek()
if ch in '+-':
if ch == '+':
chomping = True
else:
chomping = False
self.forward()
ch = self.peek()
if ch in '0123456789':
increment = int(ch)
if increment == 0:
raise ScannerError("while scanning a block scalar", start_mark,
"expected indentation indicator in the range 1-9, but found 0",
self.get_mark())
self.forward()
elif ch in '0123456789':
increment = int(ch)
if increment == 0:
raise ScannerError("while scanning a block scalar", start_mark,
"expected indentation indicator in the range 1-9, but found 0",
self.get_mark())
self.forward()
ch = self.peek()
if ch in '+-':
if ch == '+':
chomping = True
else:
chomping = False
self.forward()
ch = self.peek()
if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a block scalar", start_mark,
"expected chomping or indentation indicators, but found %r"
% ch, self.get_mark())
return chomping, increment
def scan_block_scalar_ignored_line(self, start_mark):
# See the specification for details.
while self.peek() == ' ':
self.forward()
if self.peek() == '#':
while self.peek() not in '\0\r\n\x85\u2028\u2029':
self.forward()
ch = self.peek()
if ch not in '\0\r\n\x85\u2028\u2029':
raise ScannerError("while scanning a block scalar", start_mark,
"expected a comment or a line break, but found %r" % ch,
self.get_mark())
self.scan_line_break()
def scan_block_scalar_indentation(self):
# See the specification for details.
chunks = []
max_indent = 0
end_mark = self.get_mark()
while self.peek() in ' \r\n\x85\u2028\u2029':
if self.peek() != ' ':
chunks.append(self.scan_line_break())
end_mark = self.get_mark()
else:
self.forward()
if self.column > max_indent:
max_indent = self.column
return chunks, max_indent, end_mark
def scan_block_scalar_breaks(self, indent):
# See the specification for details.
chunks = []
end_mark = self.get_mark()
while self.column < indent and self.peek() == ' ':
self.forward()
while self.peek() in '\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break())
end_mark = self.get_mark()
while self.column < indent and self.peek() == ' ':
self.forward()
return chunks, end_mark
def scan_flow_scalar(self, style):
# See the specification for details.
# Note that we loose indentation rules for quoted scalars. Quoted
# scalars don't need to adhere indentation because " and ' clearly
# mark the beginning and the end of them. Therefore we are less
# restrictive then the specification requires. We only need to check
# that document separators are not included in scalars.
if style == '"':
double = True
else:
double = False
chunks = []
start_mark = self.get_mark()
quote = self.peek()
self.forward()
chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
while self.peek() != quote:
chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
self.forward()
end_mark = self.get_mark()
return ScalarToken(''.join(chunks), False, start_mark, end_mark,
style)
ESCAPE_REPLACEMENTS = {
'0': '\0',
'a': '\x07',
'b': '\x08',
't': '\x09',
'\t': '\x09',
'n': '\x0A',
'v': '\x0B',
'f': '\x0C',
'r': '\x0D',
'e': '\x1B',
' ': '\x20',
'\"': '\"',
'\\': '\\',
'/': '/',
'N': '\x85',
'_': '\xA0',
'L': '\u2028',
'P': '\u2029',
}
ESCAPE_CODES = {
'x': 2,
'u': 4,
'U': 8,
}
def scan_flow_scalar_non_spaces(self, double, start_mark):
# See the specification for details.
chunks = []
while True:
length = 0
while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029':
length += 1
if length:
chunks.append(self.prefix(length))
self.forward(length)
ch = self.peek()
if not double and ch == '\'' and self.peek(1) == '\'':
chunks.append('\'')
self.forward(2)
elif (double and ch == '\'') or (not double and ch in '\"\\'):
chunks.append(ch)
self.forward()
elif double and ch == '\\':
self.forward()
ch = self.peek()
if ch in self.ESCAPE_REPLACEMENTS:
chunks.append(self.ESCAPE_REPLACEMENTS[ch])
self.forward()
elif ch in self.ESCAPE_CODES:
length = self.ESCAPE_CODES[ch]
self.forward()
for k in range(length):
if self.peek(k) not in '0123456789ABCDEFabcdef':
raise ScannerError("while scanning a double-quoted scalar", start_mark,
"expected escape sequence of %d hexdecimal numbers, but found %r" %
(length, self.peek(k)), self.get_mark())
code = int(self.prefix(length), 16)
chunks.append(chr(code))
self.forward(length)
elif ch in '\r\n\x85\u2028\u2029':
self.scan_line_break()
chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
else:
raise ScannerError("while scanning a double-quoted scalar", start_mark,
"found unknown escape character %r" % ch, self.get_mark())
else:
return chunks
def scan_flow_scalar_spaces(self, double, start_mark):
# See the specification for details.
chunks = []
length = 0
while self.peek(length) in ' \t':
length += 1
whitespaces = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch == '\0':
raise ScannerError("while scanning a quoted scalar", start_mark,
"found unexpected end of stream", self.get_mark())
elif ch in '\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
breaks = self.scan_flow_scalar_breaks(double, start_mark)
if line_break != '\n':
chunks.append(line_break)
elif not breaks:
chunks.append(' ')
chunks.extend(breaks)
else:
chunks.append(whitespaces)
return chunks
def scan_flow_scalar_breaks(self, double, start_mark):
# See the specification for details.
chunks = []
while True:
# Instead of checking indentation, we check for document
# separators.
prefix = self.prefix(3)
if (prefix == '---' or prefix == '...') \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
raise ScannerError("while scanning a quoted scalar", start_mark,
"found unexpected document separator", self.get_mark())
while self.peek() in ' \t':
self.forward()
if self.peek() in '\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break())
else:
return chunks
def scan_plain(self):
# See the specification for details.
# We add an additional restriction for the flow context:
# plain scalars in the flow context cannot contain ',' or '?'.
# We also keep track of the `allow_simple_key` flag here.
# Indentation rules are loosed for the flow context.
chunks = []
start_mark = self.get_mark()
end_mark = start_mark
indent = self.indent+1
# We allow zero indentation for scalars, but then we need to check for
# document separators at the beginning of the line.
#if indent == 0:
# indent = 1
spaces = []
while True:
length = 0
if self.peek() == '#':
break
while True:
ch = self.peek(length)
if ch in '\0 \t\r\n\x85\u2028\u2029' \
or (ch == ':' and
self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029'
+ (u',[]{}' if self.flow_level else u''))\
or (self.flow_level and ch in ',?[]{}'):
break
length += 1
if length == 0:
break
self.allow_simple_key = False
chunks.extend(spaces)
chunks.append(self.prefix(length))
self.forward(length)
end_mark = self.get_mark()
spaces = self.scan_plain_spaces(indent, start_mark)
if not spaces or self.peek() == '#' \
or (not self.flow_level and self.column < indent):
break
return ScalarToken(''.join(chunks), True, start_mark, end_mark)
def scan_plain_spaces(self, indent, start_mark):
# See the specification for details.
# The specification is really confusing about tabs in plain scalars.
# We just forbid them completely. Do not use tabs in YAML!
chunks = []
length = 0
while self.peek(length) in ' ':
length += 1
whitespaces = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch in '\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
self.allow_simple_key = True
prefix = self.prefix(3)
if (prefix == '---' or prefix == '...') \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
return
breaks = []
while self.peek() in ' \r\n\x85\u2028\u2029':
if self.peek() == ' ':
self.forward()
else:
breaks.append(self.scan_line_break())
prefix = self.prefix(3)
if (prefix == '---' or prefix == '...') \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
return
if line_break != '\n':
chunks.append(line_break)
elif not breaks:
chunks.append(' ')
chunks.extend(breaks)
elif whitespaces:
chunks.append(whitespaces)
return chunks
def scan_tag_handle(self, name, start_mark):
# See the specification for details.
# For some strange reasons, the specification does not allow '_' in
# tag handles. I have allowed it anyway.
ch = self.peek()
if ch != '!':
raise ScannerError("while scanning a %s" % name, start_mark,
"expected '!', but found %r" % ch, self.get_mark())
length = 1
ch = self.peek(length)
if ch != ' ':
while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_':
length += 1
ch = self.peek(length)
if ch != '!':
self.forward(length)
raise ScannerError("while scanning a %s" % name, start_mark,
"expected '!', but found %r" % ch, self.get_mark())
length += 1
value = self.prefix(length)
self.forward(length)
return value
def scan_tag_uri(self, name, start_mark):
# See the specification for details.
# Note: we do not check if URI is well-formed.
chunks = []
length = 0
ch = self.peek(length)
while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-;/?:@&=+$,_.!~*\'()[]%':
if ch == '%':
chunks.append(self.prefix(length))
self.forward(length)
length = 0
chunks.append(self.scan_uri_escapes(name, start_mark))
else:
length += 1
ch = self.peek(length)
if length:
chunks.append(self.prefix(length))
self.forward(length)
length = 0
if not chunks:
raise ScannerError("while parsing a %s" % name, start_mark,
"expected URI, but found %r" % ch, self.get_mark())
return ''.join(chunks)
def scan_uri_escapes(self, name, start_mark):
# See the specification for details.
codes = []
mark = self.get_mark()
while self.peek() == '%':
self.forward()
for k in range(2):
if self.peek(k) not in '0123456789ABCDEFabcdef':
raise ScannerError("while scanning a %s" % name, start_mark,
"expected URI escape sequence of 2 hexdecimal numbers, but found %r"
% self.peek(k), self.get_mark())
codes.append(int(self.prefix(2), 16))
self.forward(2)
try:
value = bytes(codes).decode('utf-8')
except UnicodeDecodeError as exc:
raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
return value
def scan_line_break(self):
# Transforms:
# '\r\n' : '\n'
# '\r' : '\n'
# '\n' : '\n'
# '\x85' : '\n'
# '\u2028' : '\u2028'
# '\u2029 : '\u2029'
# default : ''
ch = self.peek()
if ch in '\r\n\x85':
if self.prefix(2) == '\r\n':
self.forward(2)
else:
self.forward()
return '\n'
elif ch in '\u2028\u2029':
self.forward()
return ch
return ''
================================================
FILE: metaflow/_vendor/yaml/serializer.py
================================================
__all__ = ['Serializer', 'SerializerError']
from .error import YAMLError
from .events import *
from .nodes import *
class SerializerError(YAMLError):
pass
class Serializer:
ANCHOR_TEMPLATE = 'id%03d'
def __init__(self, encoding=None,
explicit_start=None, explicit_end=None, version=None, tags=None):
self.use_encoding = encoding
self.use_explicit_start = explicit_start
self.use_explicit_end = explicit_end
self.use_version = version
self.use_tags = tags
self.serialized_nodes = {}
self.anchors = {}
self.last_anchor_id = 0
self.closed = None
def open(self):
if self.closed is None:
self.emit(StreamStartEvent(encoding=self.use_encoding))
self.closed = False
elif self.closed:
raise SerializerError("serializer is closed")
else:
raise SerializerError("serializer is already opened")
def close(self):
if self.closed is None:
raise SerializerError("serializer is not opened")
elif not self.closed:
self.emit(StreamEndEvent())
self.closed = True
#def __del__(self):
# self.close()
def serialize(self, node):
if self.closed is None:
raise SerializerError("serializer is not opened")
elif self.closed:
raise SerializerError("serializer is closed")
self.emit(DocumentStartEvent(explicit=self.use_explicit_start,
version=self.use_version, tags=self.use_tags))
self.anchor_node(node)
self.serialize_node(node, None, None)
self.emit(DocumentEndEvent(explicit=self.use_explicit_end))
self.serialized_nodes = {}
self.anchors = {}
self.last_anchor_id = 0
def anchor_node(self, node):
if node in self.anchors:
if self.anchors[node] is None:
self.anchors[node] = self.generate_anchor(node)
else:
self.anchors[node] = None
if isinstance(node, SequenceNode):
for item in node.value:
self.anchor_node(item)
elif isinstance(node, MappingNode):
for key, value in node.value:
self.anchor_node(key)
self.anchor_node(value)
def generate_anchor(self, node):
self.last_anchor_id += 1
return self.ANCHOR_TEMPLATE % self.last_anchor_id
def serialize_node(self, node, parent, index):
alias = self.anchors[node]
if node in self.serialized_nodes:
self.emit(AliasEvent(alias))
else:
self.serialized_nodes[node] = True
self.descend_resolver(parent, index)
if isinstance(node, ScalarNode):
detected_tag = self.resolve(ScalarNode, node.value, (True, False))
default_tag = self.resolve(ScalarNode, node.value, (False, True))
implicit = (node.tag == detected_tag), (node.tag == default_tag)
self.emit(ScalarEvent(alias, node.tag, implicit, node.value,
style=node.style))
elif isinstance(node, SequenceNode):
implicit = (node.tag
== self.resolve(SequenceNode, node.value, True))
self.emit(SequenceStartEvent(alias, node.tag, implicit,
flow_style=node.flow_style))
index = 0
for item in node.value:
self.serialize_node(item, node, index)
index += 1
self.emit(SequenceEndEvent())
elif isinstance(node, MappingNode):
implicit = (node.tag
== self.resolve(MappingNode, node.value, True))
self.emit(MappingStartEvent(alias, node.tag, implicit,
flow_style=node.flow_style))
for key, value in node.value:
self.serialize_node(key, node, None)
self.serialize_node(value, node, key)
self.emit(MappingEndEvent())
self.ascend_resolver()
================================================
FILE: metaflow/_vendor/yaml/tokens.py
================================================
class Token(object):
def __init__(self, start_mark, end_mark):
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in self.__dict__
if not key.endswith('_mark')]
attributes.sort()
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
#class BOMToken(Token):
# id = ''
class DirectiveToken(Token):
id = ''
def __init__(self, name, value, start_mark, end_mark):
self.name = name
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class DocumentStartToken(Token):
id = ''
class DocumentEndToken(Token):
id = ''
class StreamStartToken(Token):
id = ''
def __init__(self, start_mark=None, end_mark=None,
encoding=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
class StreamEndToken(Token):
id = ''
class BlockSequenceStartToken(Token):
id = ''
class BlockMappingStartToken(Token):
id = ''
class BlockEndToken(Token):
id = ''
class FlowSequenceStartToken(Token):
id = '['
class FlowMappingStartToken(Token):
id = '{'
class FlowSequenceEndToken(Token):
id = ']'
class FlowMappingEndToken(Token):
id = '}'
class KeyToken(Token):
id = '?'
class ValueToken(Token):
id = ':'
class BlockEntryToken(Token):
id = '-'
class FlowEntryToken(Token):
id = ','
class AliasToken(Token):
id = ''
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class AnchorToken(Token):
id = ''
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class TagToken(Token):
id = ''
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class ScalarToken(Token):
id = ''
def __init__(self, value, plain, start_mark, end_mark, style=None):
self.value = value
self.plain = plain
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
================================================
FILE: metaflow/_vendor/zipp.LICENSE
================================================
Copyright Jason R. Coombs
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
================================================
FILE: metaflow/_vendor/zipp.py
================================================
import io
import posixpath
import zipfile
import itertools
import contextlib
import sys
import pathlib
if sys.version_info < (3, 7):
from collections import OrderedDict
else:
OrderedDict = dict
__all__ = ['Path']
def _parents(path):
"""
Given a path with elements separated by
posixpath.sep, generate all parents of that path.
>>> list(_parents('b/d'))
['b']
>>> list(_parents('/b/d/'))
['/b']
>>> list(_parents('b/d/f/'))
['b/d', 'b']
>>> list(_parents('b'))
[]
>>> list(_parents(''))
[]
"""
return itertools.islice(_ancestry(path), 1, None)
def _ancestry(path):
"""
Given a path with elements separated by
posixpath.sep, generate all elements of that path
>>> list(_ancestry('b/d'))
['b/d', 'b']
>>> list(_ancestry('/b/d/'))
['/b/d', '/b']
>>> list(_ancestry('b/d/f/'))
['b/d/f', 'b/d', 'b']
>>> list(_ancestry('b'))
['b']
>>> list(_ancestry(''))
[]
"""
path = path.rstrip(posixpath.sep)
while path and path != posixpath.sep:
yield path
path, tail = posixpath.split(path)
_dedupe = OrderedDict.fromkeys
"""Deduplicate an iterable in original order"""
def _difference(minuend, subtrahend):
"""
Return items in minuend not in subtrahend, retaining order
with O(1) lookup.
"""
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
class CompleteDirs(zipfile.ZipFile):
"""
A ZipFile subclass that ensures that implied directories
are always included in the namelist.
"""
@staticmethod
def _implied_dirs(names):
parents = itertools.chain.from_iterable(map(_parents, names))
as_dirs = (p + posixpath.sep for p in parents)
return _dedupe(_difference(as_dirs, names))
def namelist(self):
names = super(CompleteDirs, self).namelist()
return names + list(self._implied_dirs(names))
def _name_set(self):
return set(self.namelist())
def resolve_dir(self, name):
"""
If the name represents a directory, return that name
as a directory (with the trailing slash).
"""
names = self._name_set()
dirname = name + '/'
dir_match = name not in names and dirname in names
return dirname if dir_match else name
@classmethod
def make(cls, source):
"""
Given a source (filename or zipfile), return an
appropriate CompleteDirs subclass.
"""
if isinstance(source, CompleteDirs):
return source
if not isinstance(source, zipfile.ZipFile):
return cls(_pathlib_compat(source))
# Only allow for FastLookup when supplied zipfile is read-only
if 'r' not in source.mode:
cls = CompleteDirs
source.__class__ = cls
return source
class FastLookup(CompleteDirs):
"""
ZipFile subclass to ensure implicit
dirs exist and are resolved rapidly.
"""
def namelist(self):
with contextlib.suppress(AttributeError):
return self.__names
self.__names = super(FastLookup, self).namelist()
return self.__names
def _name_set(self):
with contextlib.suppress(AttributeError):
return self.__lookup
self.__lookup = super(FastLookup, self)._name_set()
return self.__lookup
def _pathlib_compat(path):
"""
For path-like objects, convert to a filename for compatibility
on Python 3.6.1 and earlier.
"""
try:
return path.__fspath__()
except AttributeError:
return str(path)
class Path:
"""
A pathlib-compatible interface for zip files.
Consider a zip file with this structure::
.
├── a.txt
└── b
├── c.txt
└── d
└── e.txt
>>> data = io.BytesIO()
>>> zf = zipfile.ZipFile(data, 'w')
>>> zf.writestr('a.txt', 'content of a')
>>> zf.writestr('b/c.txt', 'content of c')
>>> zf.writestr('b/d/e.txt', 'content of e')
>>> zf.filename = 'mem/abcde.zip'
Path accepts the zipfile object itself or a filename
>>> root = Path(zf)
From there, several path operations are available.
Directory iteration (including the zip file itself):
>>> a, b = root.iterdir()
>>> a
Path('mem/abcde.zip', 'a.txt')
>>> b
Path('mem/abcde.zip', 'b/')
name property:
>>> b.name
'b'
join with divide operator:
>>> c = b / 'c.txt'
>>> c
Path('mem/abcde.zip', 'b/c.txt')
>>> c.name
'c.txt'
Read text:
>>> c.read_text()
'content of c'
existence:
>>> c.exists()
True
>>> (b / 'missing.txt').exists()
False
Coercion to string:
>>> import os
>>> str(c).replace(os.sep, posixpath.sep)
'mem/abcde.zip/b/c.txt'
At the root, ``name``, ``filename``, and ``parent``
resolve to the zipfile. Note these attributes are not
valid and will raise a ``ValueError`` if the zipfile
has no filename.
>>> root.name
'abcde.zip'
>>> str(root.filename).replace(os.sep, posixpath.sep)
'mem/abcde.zip'
>>> str(root.parent)
'mem'
"""
__repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
def __init__(self, root, at=""):
"""
Construct a Path from a ZipFile or filename.
Note: When the source is an existing ZipFile object,
its type (__class__) will be mutated to a
specialized type. If the caller wishes to retain the
original type, the caller should either create a
separate ZipFile object or pass a filename.
"""
self.root = FastLookup.make(root)
self.at = at
def open(self, mode='r', *args, pwd=None, **kwargs):
"""
Open this entry as text or binary following the semantics
of ``pathlib.Path.open()`` by passing arguments through
to io.TextIOWrapper().
"""
if self.is_dir():
raise IsADirectoryError(self)
zip_mode = mode[0]
if not self.exists() and zip_mode == 'r':
raise FileNotFoundError(self)
stream = self.root.open(self.at, zip_mode, pwd=pwd)
if 'b' in mode:
if args or kwargs:
raise ValueError("encoding args invalid for binary operation")
return stream
return io.TextIOWrapper(stream, *args, **kwargs)
@property
def name(self):
return pathlib.Path(self.at).name or self.filename.name
@property
def suffix(self):
return pathlib.Path(self.at).suffix or self.filename.suffix
@property
def suffixes(self):
return pathlib.Path(self.at).suffixes or self.filename.suffixes
@property
def stem(self):
return pathlib.Path(self.at).stem or self.filename.stem
@property
def filename(self):
return pathlib.Path(self.root.filename).joinpath(self.at)
def read_text(self, *args, **kwargs):
with self.open('r', *args, **kwargs) as strm:
return strm.read()
def read_bytes(self):
with self.open('rb') as strm:
return strm.read()
def _is_child(self, path):
return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
def _next(self, at):
return self.__class__(self.root, at)
def is_dir(self):
return not self.at or self.at.endswith("/")
def is_file(self):
return self.exists() and not self.is_dir()
def exists(self):
return self.at in self.root._name_set()
def iterdir(self):
if not self.is_dir():
raise ValueError("Can't listdir a file")
subs = map(self._next, self.root.namelist())
return filter(self._is_child, subs)
def __str__(self):
return posixpath.join(self.root.filename, self.at)
def __repr__(self):
return self.__repr.format(self=self)
def joinpath(self, *other):
next = posixpath.join(self.at, *map(_pathlib_compat, other))
return self._next(self.root.resolve_dir(next))
__truediv__ = joinpath
@property
def parent(self):
if not self.at:
return self.filename.parent
parent_at = posixpath.dirname(self.at.rstrip('/'))
if parent_at:
parent_at += '/'
return self._next(parent_at)
================================================
FILE: metaflow/cards.py
================================================
from metaflow.plugins.cards.card_client import get_cards
from metaflow.plugins.cards.card_modules.card import MetaflowCardComponent, MetaflowCard
from metaflow.plugins.cards.card_modules.components import (
Artifact,
Table,
Image,
Error,
Markdown,
VegaChart,
ProgressBar,
ValueBox,
PythonCode,
EventsTimeline,
JSONViewer,
YAMLViewer,
)
from metaflow.plugins.cards.card_modules.basic import (
DefaultCard,
PageComponent,
ErrorCard,
BlankCard,
)
================================================
FILE: metaflow/cli.py
================================================
import os
import functools
import inspect
import os
import sys
import traceback
from datetime import datetime
import metaflow.tracing as tracing
from metaflow._vendor import click
from . import decorators, lint, metaflow_version, parameters, plugins
from .cli_args import cli_args
from .cli_components.utils import LazyGroup, LazyPluginCommandCollection
from .datastore import FlowDataStore, TaskDataStoreSet
from .debug import debug
from .exception import CommandException, MetaflowException
from .flowspec import FlowStateItems
from .graph import FlowGraph
from .metaflow_config import (
DEFAULT_DATASTORE,
DEFAULT_DECOSPECS,
DEFAULT_ENVIRONMENT,
DEFAULT_EVENT_LOGGER,
DEFAULT_METADATA,
DEFAULT_MONITOR,
DEFAULT_PACKAGE_SUFFIXES,
)
from .metaflow_current import current
from .metaflow_profile import from_start
from metaflow.system import _system_monitor, _system_logger
from .metaflow_environment import MetaflowEnvironment
from .packaging_sys import MetaflowCodeContent
from .plugins import (
DATASTORES,
ENVIRONMENTS,
LOGGING_SIDECARS,
METADATA_PROVIDERS,
MONITOR_SIDECARS,
)
from .pylint_wrapper import PyLint
from .R import metaflow_r_version, use_r
from .util import get_latest_run_id, resolve_identity, decompress_list
from .user_configs.config_options import LocalFileInput, config_options
from .user_configs.config_parameters import ConfigValue
ERASE_TO_EOL = "\033[K"
HIGHLIGHT = "red"
INDENT = " " * 4
LOGGER_TIMESTAMP = "magenta"
LOGGER_COLOR = "green"
LOGGER_BAD_COLOR = "red"
def echo_dev_null(*args, **kwargs):
pass
def echo_always(line, **kwargs):
if kwargs.pop("wrap", False):
import textwrap
indent_str = INDENT if kwargs.get("indent", None) else ""
effective_width = 80 - len(indent_str)
wrapped = textwrap.wrap(line, width=effective_width, break_long_words=False)
line = "\n".join(indent_str + l for l in wrapped)
kwargs["indent"] = False
kwargs["err"] = kwargs.get("err", True)
if kwargs.pop("indent", None):
line = "\n".join(INDENT + x for x in line.splitlines())
if "nl" not in kwargs or kwargs["nl"]:
line += ERASE_TO_EOL
top = kwargs.pop("padding_top", None)
bottom = kwargs.pop("padding_bottom", None)
highlight = kwargs.pop("highlight", HIGHLIGHT)
if top:
click.secho(ERASE_TO_EOL, **kwargs)
hl_bold = kwargs.pop("highlight_bold", True)
nl = kwargs.pop("nl", True)
fg = kwargs.pop("fg", None)
bold = kwargs.pop("bold", False)
kwargs["nl"] = False
hl = True
nobold = kwargs.pop("no_bold", False)
if nobold:
click.secho(line, **kwargs)
else:
for span in line.split("*"):
if hl:
hl = False
kwargs["fg"] = fg
kwargs["bold"] = bold
click.secho(span, **kwargs)
else:
hl = True
kwargs["fg"] = highlight
kwargs["bold"] = hl_bold
click.secho(span, **kwargs)
if nl:
kwargs["nl"] = True
click.secho("", **kwargs)
if bottom:
click.secho(ERASE_TO_EOL, **kwargs)
def logger(body="", system_msg=False, head="", bad=False, timestamp=True, nl=True):
if timestamp:
if timestamp is True:
dt = datetime.now()
else:
dt = timestamp
tstamp = dt.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
click.secho(tstamp + " ", fg=LOGGER_TIMESTAMP, nl=False)
if head:
click.secho(head, fg=LOGGER_COLOR, nl=False)
click.secho(body, bold=system_msg, fg=LOGGER_BAD_COLOR if bad else None, nl=nl)
@click.group(
cls=LazyGroup,
lazy_subcommands={
"init": "metaflow.cli_components.init_cmd.init",
"dump": "metaflow.cli_components.dump_cmd.dump",
"step": "metaflow.cli_components.step_cmd.step",
"run": "metaflow.cli_components.run_cmds.run",
"resume": "metaflow.cli_components.run_cmds.resume",
"spin": "metaflow.cli_components.run_cmds.spin",
"spin-step": "metaflow.cli_components.step_cmd.spin_step",
},
)
def cli(ctx):
pass
@cli.command(help="Check that the flow is valid (default).")
@click.option(
"--warnings/--no-warnings",
default=False,
show_default=True,
help="Show all Pylint warnings, not just errors.",
)
@click.pass_obj
def check(obj, warnings=False):
if obj.is_quiet:
echo = echo_dev_null
else:
echo = echo_always
_check(
echo, obj.graph, obj.flow, obj.environment, pylint=obj.pylint, warnings=warnings
)
fname = inspect.getfile(obj.flow.__class__)
echo(
"\n*'{cmd} show'* shows a description of this flow.\n"
"*'{cmd} run'* runs the flow locally.\n"
"*'{cmd} help'* shows all available commands and options.\n".format(cmd=fname),
highlight="magenta",
highlight_bold=False,
)
@cli.command(help="Show structure of the flow.")
@click.pass_obj
def show(obj):
echo_always("\n%s" % obj.graph.doc)
for node_name in obj.graph.sorted_nodes:
echo_always("")
node = obj.graph[node_name]
for deco in node.decorators:
echo_always("@%s" % deco.name, err=False)
for deco in node.wrappers:
echo_always("@%s" % deco.decorator_name, err=False)
echo_always("Step *%s*" % node.name, err=False)
echo_always(node.doc if node.doc else "?", indent=True, err=False)
if node.type != "end":
echo_always(
"*=>* %s" % ", ".join("*%s*" % n for n in node.out_funcs),
indent=True,
highlight="magenta",
highlight_bold=False,
err=False,
)
echo_always("")
@cli.command(help="Show all available commands.")
@click.pass_context
def help(ctx):
print(ctx.parent.get_help())
@cli.command(help="Output internal state of the flow graph.")
@click.option("--json", is_flag=True, help="Output the flow graph in JSON format.")
@click.pass_obj
def output_raw(obj, json):
if json:
import json as _json
_msg = "Internal representation of the flow in JSON format:"
_graph_dict, _graph_struct = obj.graph.output_steps()
_graph = _json.dumps(
dict(graph=_graph_dict, graph_structure=_graph_struct), indent=4
)
else:
_graph = str(obj.graph)
_msg = "Internal representation of the flow:"
echo_always(_msg, fg="magenta", bold=False)
echo_always(_graph, err=False)
@cli.command(help="Visualize the flow with Graphviz.")
@click.pass_obj
def output_dot(obj):
echo_always("Visualizing the flow as a GraphViz graph", fg="magenta", bold=False)
echo_always(
"Try piping the output to 'dot -Tpng -o graph.png' to produce "
"an actual image.",
indent=True,
)
echo_always(obj.graph.output_dot(), err=False)
@cli.command(help="Print the Metaflow version")
@click.pass_obj
def version(obj):
echo_always(obj.version)
# NOTE: add_decorator_options should be TL because it checks to make sure
# that no option conflict with the ones below
@decorators.add_decorator_options
@config_options
@click.command(
cls=LazyPluginCommandCollection,
sources=[cli],
lazy_sources=plugins.get_plugin_cli_path(),
invoke_without_command=True,
)
# Quiet is eager to make sure it is available when processing --config options since
# we need it to construct a context to pass to any DeployTimeField for the default
# value.
@click.option(
"--quiet/--not-quiet",
show_default=True,
default=False,
help="Suppress unnecessary messages",
is_eager=True,
)
@click.option(
"--metadata",
default=DEFAULT_METADATA,
show_default=True,
type=click.Choice([m.TYPE for m in METADATA_PROVIDERS]),
help="Metadata service type",
)
@click.option(
"--environment",
default=DEFAULT_ENVIRONMENT,
show_default=True,
type=click.Choice(["local"] + [m.TYPE for m in ENVIRONMENTS]),
help="Execution environment type",
)
@click.option(
"--force-rebuild-environments/--no-force-rebuild-environments",
is_flag=True,
default=False,
hidden=True,
type=bool,
help="Explicitly rebuild the execution environments",
)
# See comment for --quiet
@click.option(
"--datastore",
default=DEFAULT_DATASTORE,
show_default=True,
type=click.Choice([d.TYPE for d in DATASTORES]),
help="Data backend type",
is_eager=True,
)
@click.option("--datastore-root", help="Root path for datastore")
@click.option(
"--package-suffixes",
help="A comma-separated list of file suffixes to include in the code package.",
default=DEFAULT_PACKAGE_SUFFIXES,
show_default=True,
)
@click.option(
"--with",
"decospecs",
multiple=True,
help="Add a decorator to all steps. You can specify this option "
"multiple times to attach multiple decorators in steps.",
)
@click.option(
"--pylint/--no-pylint",
default=True,
show_default=True,
help="Run Pylint on the flow if pylint is installed.",
)
@click.option(
"--event-logger",
default=DEFAULT_EVENT_LOGGER,
show_default=True,
type=click.Choice(LOGGING_SIDECARS),
help="type of event logger used",
)
@click.option(
"--monitor",
default=DEFAULT_MONITOR,
show_default=True,
type=click.Choice(MONITOR_SIDECARS),
help="Monitoring backend type",
)
@click.option(
"--local-config-file",
type=LocalFileInput(exists=True, readable=True, dir_okay=False, resolve_path=True),
required=False,
default=None,
help="A filename containing the dumped configuration values. Internal use only.",
hidden=True,
is_eager=True,
)
@click.option(
"--mode",
type=click.Choice(["spin"]),
default=None,
help="Execution mode for metaflow CLI commands. Use 'spin' to enable "
"spin metadata and spin datastore for executions",
)
@click.pass_context
def start(
ctx,
quiet=False,
metadata=None,
environment=None,
force_rebuild_environments=False,
datastore=None,
datastore_root=None,
decospecs=None,
package_suffixes=None,
pylint=None,
event_logger=None,
monitor=None,
local_config_file=None,
config=None,
config_value=None,
mode=None,
**deco_options
):
if quiet:
echo = echo_dev_null
else:
echo = echo_always
ctx.obj.version = metaflow_version.get_version()
version = ctx.obj.version
if use_r():
version = metaflow_r_version()
from_start("MetaflowCLI: Starting")
echo("Metaflow %s" % version, fg="magenta", bold=True, nl=False)
echo(" executing *%s*" % ctx.obj.flow.name, fg="magenta", nl=False)
echo(" for *%s*" % resolve_identity(), fg="magenta")
# Check if we need to setup the distribution finder (if running )
dist_info = MetaflowCodeContent.get_distribution_finder()
if dist_info:
sys.meta_path.append(dist_info)
# Setup the context
cli_args._set_top_kwargs(ctx.params)
ctx.obj.echo = echo
ctx.obj.echo_always = echo_always
ctx.obj.is_quiet = quiet
ctx.obj.logger = logger
ctx.obj.pylint = pylint
ctx.obj.check = functools.partial(_check, echo)
ctx.obj.top_cli = cli
ctx.obj.package_suffixes = package_suffixes.split(",")
ctx.obj.spin_mode = mode == "spin"
ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == datastore][0]
if datastore_root is None:
datastore_root = ctx.obj.datastore_impl.get_datastore_root_from_config(
ctx.obj.echo
)
if datastore_root is None:
raise CommandException(
"Could not find the location of the datastore -- did you correctly set the "
"METAFLOW_DATASTORE_SYSROOT_%s environment variable?" % datastore.upper()
)
ctx.obj.datastore_impl.datastore_root = datastore_root
FlowDataStore.default_storage_impl = ctx.obj.datastore_impl
# At this point, we are able to resolve the user-configuration options so we can
# process all those decorators that the user added that will modify the flow based
# on those configurations. It is important to do this as early as possible since it
# actually modifies the flow itself
# When we process the options, the first one processed will return None and the
# second one processed will return the actual options. The order of processing
# depends on what (and in what order) the user specifies on the command line.
config_options = config or config_value
if (
hasattr(ctx, "saved_args")
and ctx.saved_args
and ctx.saved_args[0] == "resume"
and getattr(ctx.obj, "has_config_options", False)
):
# In the case of resume, we actually need to load the configurations
# from the resumed run to process them. This can be slightly onerous so check
# if we need to in the first place
if getattr(ctx.obj, "has_cl_config_options", False):
raise click.UsageError(
"Cannot specify --config or --config-value with 'resume'"
)
# We now load the config artifacts from the original run id
run_id = None
try:
idx = ctx.saved_args.index("--origin-run-id")
except ValueError:
idx = -1
if idx >= 0:
run_id = ctx.saved_args[idx + 1]
else:
run_id = get_latest_run_id(ctx.obj.echo, ctx.obj.flow.name)
if run_id is None:
raise CommandException(
"A previous run id was not found. Specify --origin-run-id."
)
# We get the name of the parameters we need to load from the datastore -- these
# are accessed using the *variable* name and not necessarily the *parameter* name
config_var_names = []
config_param_names = []
for name, param in ctx.obj.flow._get_parameters():
if not param.IS_CONFIG_PARAMETER:
continue
config_var_names.append(name)
config_param_names.append(param.name)
# We just need a task datastore that will be thrown away -- we do this so
# we don't have to create the logger, monitor, etc.
debug.userconf_exec("Loading config parameters from run %s" % run_id)
for d in TaskDataStoreSet(
FlowDataStore(ctx.obj.flow.name),
run_id,
steps=["_parameters"],
prefetch_data_artifacts=config_var_names,
):
param_ds = d
# We can now set the the CONFIGS value in the flow properly. This will overwrite
# anything that may have been passed in by default and we will use exactly what
# the original flow had. Note that these are accessed through the parameter name
# We need to save the "plain-ness" flag to carry it over
config_plain_flags = {
k: v[1] for k, v in ctx.obj.flow._flow_state[FlowStateItems.CONFIGS].items()
}
ctx.obj.flow._flow_state[FlowStateItems.CONFIGS].clear()
d = ctx.obj.flow._flow_state[FlowStateItems.CONFIGS]
for param_name, var_name in zip(config_param_names, config_var_names):
val = param_ds[var_name]
debug.userconf_exec("Loaded config %s as: %s" % (param_name, val))
d[param_name] = (val, config_plain_flags[param_name])
elif getattr(ctx.obj, "delayed_config_exception", None):
# If we are not doing a resume, any exception we had parsing configs needs to
# be raised. For resume, since we ignore those options, we ignore the error.
raise ctx.obj.delayed_config_exception
# Init all values in the flow mutators and then process them
for decorator in ctx.obj.flow._flow_mutators:
decorator.external_init()
new_cls = ctx.obj.flow._process_config_decorators(config_options)
if new_cls:
ctx.obj.flow = new_cls(use_cli=False)
ctx.obj.graph = ctx.obj.flow._graph
ctx.obj.environment = [
e for e in ENVIRONMENTS + [MetaflowEnvironment] if e.TYPE == environment
][0](ctx.obj.flow)
# set force rebuild flag for environments that support it.
ctx.obj.environment._force_rebuild = force_rebuild_environments
ctx.obj.environment.validate_environment(ctx.obj.logger, datastore)
ctx.obj.event_logger = LOGGING_SIDECARS[event_logger](
flow=ctx.obj.flow, env=ctx.obj.environment
)
ctx.obj.monitor = MONITOR_SIDECARS[monitor](
flow=ctx.obj.flow, env=ctx.obj.environment
)
ctx.obj.metadata = [m for m in METADATA_PROVIDERS if m.TYPE == metadata][0](
ctx.obj.environment, ctx.obj.flow, ctx.obj.event_logger, ctx.obj.monitor
)
ctx.obj.flow_datastore = FlowDataStore(
ctx.obj.flow.name,
ctx.obj.environment,
ctx.obj.metadata,
ctx.obj.event_logger,
ctx.obj.monitor,
)
ctx.obj.config_options = config_options
ctx.obj.is_spin = False
ctx.obj.skip_decorators = False
# Override values for spin steps, or if we are in spin mode
if (
hasattr(ctx, "saved_args")
and ctx.saved_args
and "spin" in ctx.saved_args[0]
or ctx.obj.spin_mode
):
# To minimize side effects for spin, we will only use the following:
# - local metadata provider,
# - local datastore,
# - local environment,
# - null event logger,
# - null monitor
ctx.obj.is_spin = True
if "--skip-decorators" in ctx.saved_args:
ctx.obj.skip_decorators = True
ctx.obj.event_logger = LOGGING_SIDECARS["nullSidecarLogger"](
flow=ctx.obj.flow, env=ctx.obj.environment
)
ctx.obj.monitor = MONITOR_SIDECARS["nullSidecarMonitor"](
flow=ctx.obj.flow, env=ctx.obj.environment
)
# Use spin metadata, spin datastore, and spin datastore root
ctx.obj.metadata = [m for m in METADATA_PROVIDERS if m.TYPE == "spin"][0](
ctx.obj.environment, ctx.obj.flow, ctx.obj.event_logger, ctx.obj.monitor
)
ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == "spin"][0]
datastore_root = ctx.obj.datastore_impl.get_datastore_root_from_config(
ctx.obj.echo, create_on_absent=True
)
ctx.obj.datastore_impl.datastore_root = datastore_root
ctx.obj.flow_datastore = FlowDataStore(
ctx.obj.flow.name,
ctx.obj.environment, # Same environment as run/resume
ctx.obj.metadata, # local metadata
ctx.obj.event_logger, # null event logger
ctx.obj.monitor, # null monitor
storage_impl=ctx.obj.datastore_impl,
)
# Start event logger and monitor
ctx.obj.event_logger.start()
_system_logger.init_system_logger(ctx.obj.flow.name, ctx.obj.event_logger)
ctx.obj.monitor.start()
_system_monitor.init_system_monitor(ctx.obj.flow.name, ctx.obj.monitor)
decorators._init(ctx.obj.flow)
# It is important to initialize flow decorators early as some of the
# things they provide may be used by some of the objects initialized after.
decorators._init_flow_decorators(
ctx.obj.flow,
ctx.obj.graph,
ctx.obj.environment,
ctx.obj.flow_datastore,
ctx.obj.metadata,
ctx.obj.logger,
echo,
deco_options,
ctx.obj.is_spin,
ctx.obj.skip_decorators,
)
# In the case of run/resume/spin, we will want to apply the TL decospecs
# *after* the run decospecs so that they don't take precedence. In other
# words, for the same decorator, we want `myflow.py run --with foo` to
# take precedence over any other `foo` decospec
# Note that top-level decospecs are used primarily with non run/resume
# options as well as with the airflow/argo/sfn integrations which pass
# all the decospecs (the ones from top-level but also the ones from the
# run/resume level) through the tl decospecs.
ctx.obj.tl_decospecs = list(decospecs or [])
# initialize current and parameter context for deploy-time parameters
current._set_env(flow=ctx.obj.flow, is_running=False)
parameters.set_parameter_context(
ctx.obj.flow.name,
ctx.obj.echo,
ctx.obj.flow_datastore,
{
k: v if plain_flag or v is None else ConfigValue(v)
for k, (v, plain_flag) in ctx.obj.flow.__class__._flow_state[
FlowStateItems.CONFIGS
].items()
},
)
if (
hasattr(ctx, "saved_args")
and ctx.saved_args
and ctx.saved_args[0] not in ("run", "resume", "spin")
):
# run/resume/spin are special cases because they can add more decorators with --with,
# so they have to take care of themselves.
all_decospecs = ctx.obj.tl_decospecs + list(
ctx.obj.environment.decospecs() or []
)
# We add the default decospecs for everything except init and step since in those
# cases, the decospecs will already have been handled by either a run/resume
# or a scheduler setting them up in their own way.
if ctx.saved_args[0] not in ("step", "init"):
all_decospecs += DEFAULT_DECOSPECS.split()
elif ctx.saved_args[0] == "spin-step":
# If we are in spin-args, we will not attach any decorators
all_decospecs = []
if all_decospecs:
decorators._attach_decorators(ctx.obj.flow, all_decospecs)
decorators._init(ctx.obj.flow)
# Regenerate graph if we attached more decorators
ctx.obj.flow.__class__._init_graph()
ctx.obj.graph = ctx.obj.flow._graph
decorators._init_step_decorators(
ctx.obj.flow,
ctx.obj.graph,
ctx.obj.environment,
ctx.obj.flow_datastore,
ctx.obj.logger,
# The last two arguments are only used for spin steps
ctx.obj.is_spin,
ctx.obj.skip_decorators,
)
# Check the graph again (mutators may have changed it)
ctx.obj.graph = ctx.obj.flow._graph
# TODO (savin): Enable lazy instantiation of package
ctx.obj.package = None
if ctx.invoked_subcommand is None:
ctx.invoke(check)
def _check(echo, graph, flow, environment, pylint=True, warnings=False, **kwargs):
echo("Validating your flow...", fg="magenta", bold=False)
linter = lint.linter
# TODO set linter settings
linter.run_checks(graph, **kwargs)
echo("The graph looks good!", fg="green", bold=True, indent=True)
if pylint:
echo("Running pylint...", fg="magenta", bold=False)
fname = inspect.getfile(flow.__class__)
pylint = PyLint(fname)
if pylint.has_pylint():
pylint_is_happy, pylint_exception_msg = pylint.run(
warnings=warnings,
pylint_config=environment.pylint_config(),
logger=echo_always,
)
if pylint_is_happy:
echo("Pylint is happy!", fg="green", bold=True, indent=True)
else:
echo(
"Pylint couldn't analyze your code.\n\tPylint exception: %s"
% pylint_exception_msg,
fg="red",
bold=True,
indent=True,
)
echo("Skipping Pylint checks.", fg="red", bold=True, indent=True)
else:
echo(
"Pylint not found, so extra checks are disabled.",
fg="green",
indent=True,
bold=False,
)
def print_metaflow_exception(ex):
echo_always(ex.headline, indent=True, nl=False, bold=True)
location = ""
if ex.source_file is not None:
location += " in file %s" % ex.source_file
if ex.line_no is not None:
location += " on line %d" % ex.line_no
location += ":"
echo_always(location, bold=True)
echo_always(ex.message, indent=True, bold=False, padding_bottom=True)
def print_unknown_exception(ex):
echo_always("Internal error", indent=True, bold=True)
echo_always(traceback.format_exc(), highlight=None, highlight_bold=False)
class CliState(object):
def __init__(self, flow):
self.flow = flow
def main(flow, args=None, handle_exceptions=True, entrypoint=None):
# Ignore warning(s) and prevent spamming the end-user.
# TODO: This serves as a short term workaround for RuntimeWarning(s) thrown
# in py3.8 related to log buffering (bufsize=1).
import warnings
warnings.filterwarnings("ignore")
if entrypoint is None:
entrypoint = [sys.executable, sys.argv[0]]
state = CliState(flow)
state.entrypoint = entrypoint
try:
if args is None:
start(auto_envvar_prefix="METAFLOW", obj=state)
else:
try:
start(args=args, obj=state, auto_envvar_prefix="METAFLOW")
except SystemExit as e:
return e.code
except MetaflowException as x:
if handle_exceptions:
print_metaflow_exception(x)
sys.exit(1)
else:
raise
except Exception as x:
if handle_exceptions:
print_unknown_exception(x)
sys.exit(1)
else:
raise
finally:
if hasattr(state, "monitor") and state.monitor is not None:
state.monitor.terminate()
if hasattr(state, "event_logger") and state.event_logger is not None:
state.event_logger.terminate()
================================================
FILE: metaflow/cli_args.py
================================================
# This class provides a global singleton `cli_args` which stores the `top` and
# `step` level options for the metaflow CLI. This allows decorators to have
# access to the CLI options instead of relying (solely) on the click context.
# TODO: We have two CLIArgs:
# - this one, which captures the top level and step-level options passed to the
# step command and is used primarily for UBF to replicate the exact command
# line passed
# - one in runtime.py which is used to construct the step command and modified by
# runtime_step_cli. Both are similar in nature and should be unified in some way
#
# TODO: dict_to_cli_options uses shlex which causes some issues with this as
# well as the converting of options in runtime.py. We should make it so that we
# can properly shlex things and un-shlex when using. Ideally this should all be
# done in one place.
#
# NOTE: There is an important between these two as well:
# - this one will include local_config_file whereas the other one WILL NOT.
# This is because this is used when constructing the parallel UBF command which
# executes locally and therefore needs the local_config_file but the other (remote)
# commands do not.
from .user_configs.config_options import ConfigInput
from .util import to_unicode
class CLIArgs(object):
def __init__(self):
self._top_kwargs = {}
self._step_kwargs = {}
def _set_step_kwargs(self, kwargs):
self._step_kwargs = kwargs
def _set_top_kwargs(self, kwargs):
self._top_kwargs = kwargs
@property
def top_kwargs(self):
return self._top_kwargs
@property
def step_kwargs(self):
return self._step_kwargs
def step_command(
self, executable, script, step_name, top_kwargs=None, step_kwargs=None
):
cmd = [executable, "-u", script]
if top_kwargs is None:
top_kwargs = self._top_kwargs
if step_kwargs is None:
step_kwargs = self._step_kwargs
top_args_list = list(self._options(top_kwargs))
cmd.extend(top_args_list)
cmd.extend(["step", step_name])
step_args_list = list(self._options(step_kwargs))
cmd.extend(step_args_list)
return cmd
@staticmethod
def _options(mapping):
for k, v in mapping.items():
# None or False arguments are ignored
# v needs to be explicitly False, not falsy, e.g. 0 is an acceptable value
if v is None or v is False:
continue
# we need special handling for 'with' since it is a reserved
# keyword in Python, so we call it 'decospecs' in click args
if k == "decospecs":
k = "with"
if k in ("config", "config_value"):
# Special handling here since we gather them all in one option but actually
# need to send them one at a time using --config-value kv..
# Note it can be either config or config_value depending
# on click processing order.
for config_name in v.keys():
yield "--config-value"
yield to_unicode(config_name)
yield to_unicode(ConfigInput.make_key_name(config_name))
continue
k = k.replace("_", "-")
v = v if isinstance(v, (list, tuple, set)) else [v]
for value in v:
yield "--%s" % k
if not isinstance(value, bool):
yield to_unicode(value)
cli_args = CLIArgs()
================================================
FILE: metaflow/cli_components/__init__.py
================================================
================================================
FILE: metaflow/cli_components/dump_cmd.py
================================================
import pickle
from metaflow._vendor import click
from ..cli import echo_always, echo_dev_null
from ..datastore import TaskDataStoreSet
from ..exception import CommandException
@click.command(
help="Get data artifacts of a task or all tasks in a step. "
"The format for input-path is either / or "
"//."
)
@click.argument("input-path")
@click.option(
"--private/--no-private",
default=False,
show_default=True,
help="Show also private attributes.",
)
@click.option(
"--max-value-size",
default=1000,
show_default=True,
type=int,
help="Show only values that are smaller than this number. "
"Set to 0 to see only keys.",
)
@click.option(
"--include",
type=str,
default="",
help="Include only artifacts in the given comma-separated list.",
)
@click.option(
"--file", type=str, default=None, help="Serialize artifacts in the given file."
)
@click.pass_obj
def dump(obj, input_path, private=None, max_value_size=None, include=None, file=None):
if obj.is_quiet:
echo = echo_dev_null
else:
echo = echo_always
output = {}
kwargs = {
"show_private": private,
"max_value_size": None if file is not None else max_value_size,
"include": {t for t in include.split(",") if t},
}
# Pathspec can either be run_id/step_name or run_id/step_name/task_id.
parts = input_path.split("/")
if len(parts) == 2:
run_id, step_name = parts
task_id = None
elif len(parts) == 3:
run_id, step_name, task_id = parts
else:
raise CommandException(
"input_path should either be run_id/step_name or run_id/step_name/task_id"
)
datastore_set = TaskDataStoreSet(
obj.flow_datastore,
run_id,
steps=[step_name],
prefetch_data_artifacts=kwargs.get("include"),
)
if task_id:
ds_list = [datastore_set.get_with_pathspec(input_path)]
else:
ds_list = list(datastore_set) # get all tasks
for ds in ds_list:
echo(
"Dumping output of run_id=*{run_id}* "
"step=*{step}* task_id=*{task_id}*".format(
run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id
),
fg="magenta",
)
if file is None:
echo_always(
ds.format(**kwargs), highlight="green", highlight_bold=False, err=False
)
else:
output[ds.pathspec] = ds.to_dict(**kwargs)
if file is not None:
with open(file, "wb") as f:
pickle.dump(output, f, protocol=pickle.HIGHEST_PROTOCOL)
echo("Artifacts written to *%s*" % file)
================================================
FILE: metaflow/cli_components/init_cmd.py
================================================
from metaflow._vendor import click
from .. import parameters
from ..runtime import NativeRuntime
@parameters.add_custom_parameters(deploy_mode=False)
@click.command(help="Internal command to initialize a run.", hidden=True)
@click.option(
"--run-id",
default=None,
required=True,
help="ID for one execution of all steps in the flow.",
)
@click.option(
"--task-id", default=None, required=True, help="ID for this instance of the step."
)
@click.option(
"--tag",
"tags",
multiple=True,
default=None,
help="Tags for this instance of the step.",
)
@click.pass_obj
def init(obj, run_id=None, task_id=None, tags=None, **kwargs):
# init is a separate command instead of an option in 'step'
# since we need to capture user-specified parameters with
# @add_custom_parameters. Adding custom parameters to 'step'
# is not desirable due to the possibility of name clashes between
# user-specified parameters and our internal options. Note that
# user-specified parameters are often defined as environment
# variables.
obj.metadata.add_sticky_tags(tags=tags)
runtime = NativeRuntime(
obj.flow,
obj.graph,
obj.flow_datastore,
obj.metadata,
obj.environment,
obj.package,
obj.logger,
obj.entrypoint,
obj.event_logger,
obj.monitor,
run_id=run_id,
skip_decorator_hooks=True,
)
obj.flow._set_constants(obj.graph, kwargs, obj.config_options)
runtime.persist_constants(task_id=task_id)
================================================
FILE: metaflow/cli_components/run_cmds.py
================================================
import json
from functools import wraps
from metaflow._vendor import click
from .. import decorators, namespace, parameters, tracing
from ..exception import CommandException
from ..graph import FlowGraph
from ..metaflow_current import current
from ..metaflow_config import (
DEFAULT_DECOSPECS,
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
SPIN_PERSIST,
)
from ..metaflow_profile import from_start
from ..package import MetaflowPackage
from ..runtime import NativeRuntime, SpinRuntime
from ..system import _system_logger
# from ..client.core import Run
from ..tagging_util import validate_tags
from ..util import get_latest_run_id, write_latest_run_id, parse_spin_pathspec
def before_run(obj, tags, decospecs, skip_decorators=False):
validate_tags(tags)
# There's a --with option both at the top-level and for the run/resume/spin
# subcommand. Why?
#
# "run --with shoes" looks so much better than "--with shoes run".
# This is a very common use case of --with.
#
# A downside is that we need to have the following decorators handling
# in two places in this module and make sure _init_step_decorators
# doesn't get called twice.
# We want the order to be the following:
# - run level decospecs
# - top level decospecs
# - environment decospecs
from_start(
f"Inside before_run, skip_decorators={skip_decorators}, is_spin={obj.is_spin}"
)
if not skip_decorators:
all_decospecs = (
list(decospecs or [])
+ obj.tl_decospecs
+ list(obj.environment.decospecs() or [])
)
if all_decospecs:
# These decospecs are the ones from run/resume/spin PLUS the ones from the
# environment (for example the @conda)
decorators._attach_decorators(obj.flow, all_decospecs)
decorators._init(obj.flow)
# Regenerate graph if we attached more decorators
obj.flow.__class__._init_graph()
obj.graph = obj.flow._graph
obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
# obj.environment.init_environment(obj.logger)
decorators._init_step_decorators(
obj.flow,
obj.graph,
obj.environment,
obj.flow_datastore,
obj.logger,
obj.is_spin,
skip_decorators,
)
# Re-read graph since it may have been modified by mutators
obj.graph = obj.flow._graph
obj.metadata.add_sticky_tags(tags=tags)
# Package working directory only once per run.
# We explicitly avoid doing this in `start` since it is invoked for every
# step in the run.
obj.package = MetaflowPackage(
obj.flow,
obj.environment,
obj.echo,
suffixes=obj.package_suffixes,
flow_datastore=obj.flow_datastore if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE else None,
)
def common_runner_options(func):
@click.option(
"--run-id-file",
default=None,
show_default=True,
type=str,
help="Write the ID of this run to the file specified.",
)
@click.option(
"--runner-attribute-file",
default=None,
show_default=True,
type=str,
help="Write the metadata and pathspec of this run to the file specified. Used internally "
"for Metaflow's Runner API.",
)
@wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
def write_file(file_path, content):
if file_path is not None:
with open(file_path, "w", encoding="utf-8") as f:
f.write(str(content))
def config_callback(ctx, param, value):
# Callback to:
# - read the Click auto_envvar variable from both the
# environment AND the configuration
# - merge that value with the value passed in the command line (value)
# - return the value as a tuple
# Note that this function gets called even if there is no option passed on the
# command line.
# NOTE: Assumes that ctx.auto_envvar_prefix is set to METAFLOW (same as in
# from_conf)
# Read decospecs options from the environment (METAFLOW_DEFAULT_DECOSPECS=...)
# and merge them with the one provided as --with.
splits = DEFAULT_DECOSPECS.split()
return tuple(list(value) + splits)
def common_run_options(func):
@click.option(
"--tag",
"tags",
multiple=True,
default=None,
help="Annotate this run with the given tag. You can specify "
"this option multiple times to attach multiple tags in "
"the run.",
)
@click.option(
"--max-workers",
default=16,
show_default=True,
help="Maximum number of parallel processes.",
)
@click.option(
"--max-num-splits",
default=100,
show_default=True,
help="Maximum number of splits allowed in a foreach. This "
"is a safety check preventing bugs from triggering "
"thousands of steps inadvertently.",
)
@click.option(
"--max-log-size",
default=10,
show_default=True,
help="Maximum size of stdout and stderr captured in "
"megabytes. If a step outputs more than this to "
"stdout/stderr, its output will be truncated.",
)
@click.option(
"--with",
"decospecs",
multiple=True,
help="Add a decorator to all steps. You can specify this "
"option multiple times to attach multiple decorators "
"in steps.",
callback=config_callback,
)
@wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
@click.option(
"--origin-run-id",
default=None,
help="ID of the run that should be resumed. By default, the "
"last run executed locally.",
)
@click.option(
"--run-id",
default=None,
help="Run ID for the new run. By default, a new run-id will be generated",
hidden=True,
)
@click.option(
"--clone-only/--no-clone-only",
default=False,
show_default=True,
help="Only clone tasks without continuing execution",
hidden=True,
)
@click.option(
"--reentrant/--no-reentrant",
default=False,
show_default=True,
hidden=True,
help="If specified, allows this call to be called in parallel",
)
@click.option(
"--resume-identifier",
default=None,
show_default=True,
hidden=True,
help="If specified, it identifies the task that started this resume call. It is in the form of {step_name}-{task_id}",
)
@click.argument("step-to-rerun", required=False)
@click.command(help="Resume execution of a previous run of this flow.")
@tracing.cli("cli/resume")
@common_run_options
@common_runner_options
@click.pass_obj
def resume(
obj,
tags=None,
step_to_rerun=None,
origin_run_id=None,
run_id=None,
clone_only=False,
reentrant=False,
max_workers=None,
max_num_splits=None,
max_log_size=None,
decospecs=None,
run_id_file=None,
resume_identifier=None,
runner_attribute_file=None,
):
before_run(obj, tags, decospecs)
if origin_run_id is None:
origin_run_id = get_latest_run_id(obj.echo, obj.flow.name)
if origin_run_id is None:
raise CommandException(
"A previous run id was not found. Specify --origin-run-id."
)
if step_to_rerun is None:
steps_to_rerun = set()
else:
# validate step name
if step_to_rerun not in obj.graph.nodes:
raise CommandException(
"invalid step name {0} specified, must be step present in "
"current form of execution graph. Valid step names include: {1}".format(
step_to_rerun, ",".join(list(obj.graph.nodes.keys()))
)
)
## TODO: instead of checking execution path here, can add a warning later
## instead of throwing an error. This is for resuming a step which was not
## taken inside a branch i.e. not present in the execution path.
# origin_run = Run(f"{obj.flow.name}/{origin_run_id}", _namespace_check=False)
# executed_steps = {step.path_components[-1] for step in origin_run}
# if step_to_rerun not in executed_steps:
# raise CommandException(
# f"Cannot resume from step '{step_to_rerun}'. This step was not "
# f"part of the original execution path for run '{origin_run_id}'."
# )
steps_to_rerun = {step_to_rerun}
if run_id:
# Run-ids that are provided by the metadata service are always integers.
# External providers or run-ids (like external schedulers) always need to
# be non-integers to avoid any clashes. This condition ensures this.
try:
int(run_id)
except:
pass
else:
raise CommandException("run-id %s cannot be an integer" % run_id)
runtime = NativeRuntime(
obj.flow,
obj.graph,
obj.flow_datastore,
obj.metadata,
obj.environment,
obj.package,
obj.logger,
obj.entrypoint,
obj.event_logger,
obj.monitor,
run_id=run_id,
clone_run_id=origin_run_id,
clone_only=clone_only,
reentrant=reentrant,
steps_to_rerun=steps_to_rerun,
max_workers=max_workers,
max_num_splits=max_num_splits,
max_log_size=max_log_size * 1024 * 1024,
resume_identifier=resume_identifier,
)
write_file(run_id_file, runtime.run_id)
runtime.print_workflow_info()
runtime.persist_constants()
if runner_attribute_file:
with open(runner_attribute_file, "w", encoding="utf-8") as f:
json.dump(
{
"run_id": runtime.run_id,
"flow_name": obj.flow.name,
"metadata": obj.metadata.metadata_str(),
},
f,
)
# We may skip clone-only resume if this is not a resume leader,
# and clone is already complete.
if runtime.should_skip_clone_only_execution():
return
current._update_env(
{
"run_id": runtime.run_id,
}
)
_system_logger.log_event(
level="info",
module="metaflow.resume",
name="start",
payload={
"msg": "Resuming run",
},
)
with runtime.run_heartbeat():
if clone_only:
runtime.clone_original_run()
else:
runtime.clone_original_run(generate_task_obj=True, verbose=False)
runtime.execute()
@parameters.add_custom_parameters(deploy_mode=True)
@click.command(help="Run the workflow locally.")
@tracing.cli("cli/run")
@common_run_options
@common_runner_options
@click.option(
"--namespace",
"user_namespace",
default=None,
help="Change namespace from the default (your username) to "
"the specified tag. Note that this option does not alter "
"tags assigned to the objects produced by this run, just "
"what existing objects are visible in the client API. You "
"can enable the global namespace with an empty string."
"--namespace=",
)
@click.pass_obj
def run(
obj,
tags=None,
max_workers=None,
max_num_splits=None,
max_log_size=None,
decospecs=None,
run_id_file=None,
runner_attribute_file=None,
user_namespace=None,
**kwargs,
):
if user_namespace is not None:
namespace(user_namespace or None)
before_run(obj, tags, decospecs)
runtime = NativeRuntime(
obj.flow,
obj.graph,
obj.flow_datastore,
obj.metadata,
obj.environment,
obj.package,
obj.logger,
obj.entrypoint,
obj.event_logger,
obj.monitor,
max_workers=max_workers,
max_num_splits=max_num_splits,
max_log_size=max_log_size * 1024 * 1024,
)
write_latest_run_id(obj, runtime.run_id)
write_file(run_id_file, runtime.run_id)
obj.flow._set_constants(obj.graph, kwargs, obj.config_options)
current._update_env(
{
"run_id": runtime.run_id,
}
)
_system_logger.log_event(
level="info",
module="metaflow.run",
name="start",
payload={
"msg": "Starting run",
},
)
runtime.print_workflow_info()
runtime.persist_constants()
if runner_attribute_file:
with open(runner_attribute_file, "w", encoding="utf-8") as f:
json.dump(
{
"run_id": runtime.run_id,
"flow_name": obj.flow.name,
"metadata": obj.metadata.metadata_str(),
},
f,
)
with runtime.run_heartbeat():
runtime.execute()
# @parameters.add_custom_parameters(deploy_mode=True)
@click.command(help="Spins up a task for a given step from a previous run locally.")
@tracing.cli("cli/spin")
@click.argument("pathspec")
@click.option(
"--skip-decorators/--no-skip-decorators",
is_flag=True,
# Default False matches the saved_args check in cli.py for spin steps - skip_decorators
# only becomes True when explicitly passed, otherwise decorators are applied by default
default=False,
show_default=True,
help="Skip decorators attached to the step or flow.",
)
@click.option(
"--artifacts-module",
default=None,
show_default=True,
help="Path to a module that contains artifacts to be used in the spun step. "
"The artifacts should be defined as a dictionary called ARTIFACTS with keys as "
"the artifact names and values as the artifact values. The artifact values will "
"overwrite the default values of the artifacts used in the spun step.",
)
@click.option(
"--persist/--no-persist",
"persist",
default=SPIN_PERSIST,
show_default=True,
help="Whether to persist the artifacts in the spun step. If set to False, "
"the artifacts will not be persisted and will not be available in the spun step's "
"datastore.",
)
@click.option(
"--max-log-size",
default=10,
show_default=True,
help="Maximum size of stdout and stderr captured in "
"megabytes. If a step outputs more than this to "
"stdout/stderr, its output will be truncated.",
)
@common_runner_options
@click.pass_obj
def spin(
obj,
pathspec,
persist=True,
artifacts_module=None,
skip_decorators=False,
max_log_size=None,
run_id_file=None,
runner_attribute_file=None,
**kwargs,
):
# Parse the pathspec argument to extract step name and full pathspec
step_name, parsed_pathspec = parse_spin_pathspec(pathspec, obj.flow.name)
before_run(obj, [], [], skip_decorators)
obj.echo(f"Spinning up step *{step_name}* locally for flow *{obj.flow.name}*")
# For spin, flow parameters come from the original run, but _set_constants
# requires them in kwargs. Use parameter defaults as placeholders - they'll be
# overwritten when the spin step loads artifacts from the original run.
flow_param_defaults = {}
for var, param in obj.flow._get_parameters():
if not param.IS_CONFIG_PARAMETER:
default_value = param.kwargs.get("default")
# Use None for required parameters without defaults
flow_param_defaults[param.name.replace("-", "_").lower()] = default_value
obj.flow._set_constants(obj.graph, flow_param_defaults, obj.config_options)
step_func = getattr(obj.flow, step_name, None)
if step_func is None:
raise CommandException(
f"Step '{step_name}' not found in flow '{obj.flow.name}'. "
"Please provide a valid step name."
)
from_start("Spin: before spin runtime init")
spin_runtime = SpinRuntime(
obj.flow,
obj.graph,
obj.flow_datastore,
obj.metadata,
obj.environment,
obj.package,
obj.logger,
obj.entrypoint,
obj.event_logger,
obj.monitor,
step_func,
step_name,
parsed_pathspec,
skip_decorators,
artifacts_module,
persist,
max_log_size * 1024 * 1024,
)
write_latest_run_id(obj, spin_runtime.run_id)
write_file(run_id_file, spin_runtime.run_id)
# We only need the root for the metadata, i.e. the portion before DATASTORE_LOCAL_DIR
datastore_root = spin_runtime._flow_datastore._storage_impl.datastore_root
orig_task_metadata_root = datastore_root.rsplit("/", 1)[0]
from_start("Spin: going to execute")
spin_runtime.execute()
from_start("Spin: after spin runtime execute")
if runner_attribute_file:
with open(runner_attribute_file, "w") as f:
json.dump(
{
"task_id": spin_runtime.task.task_id,
"step_name": step_name,
"run_id": spin_runtime.run_id,
"flow_name": obj.flow.name,
# Store metadata in a format that can be used by the Runner API
"metadata": f"{obj.metadata.__class__.TYPE}@{orig_task_metadata_root}",
},
f,
)
================================================
FILE: metaflow/cli_components/step_cmd.py
================================================
from metaflow._vendor import click
from .. import namespace
from ..cli import echo_always, echo_dev_null
from ..cli_args import cli_args
from ..datastore.flow_datastore import FlowDataStore
from ..exception import CommandException
from ..client.filecache import FileCache, FileBlobCache, TaskMetadataCache
from ..metaflow_config import SPIN_ALLOWED_DECORATORS
from ..metaflow_profile import from_start
from ..plugins import DATASTORES
from ..task import MetaflowTask
from ..unbounded_foreach import UBF_CONTROL, UBF_TASK
from ..util import decompress_list, read_artifacts_module
import metaflow.tracing as tracing
@click.command(help="Internal command to execute a single task.", hidden=True)
@tracing.cli("cli/step")
@click.argument("step-name")
@click.option(
"--run-id",
default=None,
required=True,
help="ID for one execution of all steps in the flow.",
)
@click.option(
"--task-id",
default=None,
required=True,
show_default=True,
help="ID for this instance of the step.",
)
@click.option(
"--input-paths",
help="A comma-separated list of pathspecs specifying inputs for this step.",
)
@click.option(
"--input-paths-filename",
type=click.Path(exists=True, readable=True, dir_okay=False, resolve_path=True),
help="A filename containing the argument typically passed to `input-paths`",
hidden=True,
)
@click.option(
"--split-index",
type=int,
default=None,
show_default=True,
help="Index of this foreach split.",
)
@click.option(
"--tag",
"opt_tag",
multiple=True,
default=None,
help="Annotate this run with the given tag. You can specify "
"this option multiple times to attach multiple tags in "
"the task.",
)
@click.option(
"--namespace",
"opt_namespace",
default=None,
help="Change namespace from the default (your username) to the specified tag.",
)
@click.option(
"--retry-count",
default=0,
help="How many times we have attempted to run this task.",
)
@click.option(
"--max-user-code-retries",
default=0,
help="How many times we should attempt running the user code.",
)
@click.option(
"--clone-only",
default=None,
help="Pathspec of the origin task for this task to clone. Do "
"not execute anything.",
)
@click.option(
"--clone-run-id",
default=None,
help="Run id of the origin flow, if this task is part of a flow being resumed.",
)
@click.option(
"--ubf-context",
default="none",
type=click.Choice(["none", UBF_CONTROL, UBF_TASK]),
help="Provides additional context if this task is of type unbounded foreach.",
)
@click.option(
"--num-parallel",
default=0,
type=int,
help="Number of parallel instances of a step. Ignored in local mode (see parallel decorator code).",
)
@click.pass_context
def step(
ctx,
step_name,
opt_tag=None,
run_id=None,
task_id=None,
input_paths=None,
input_paths_filename=None,
split_index=None,
opt_namespace=None,
retry_count=None,
max_user_code_retries=None,
clone_only=None,
clone_run_id=None,
ubf_context="none",
num_parallel=None,
):
if ctx.obj.is_quiet:
echo = echo_dev_null
else:
echo = echo_always
if ubf_context == "none":
ubf_context = None
if opt_namespace is not None:
namespace(opt_namespace)
func = None
try:
func = getattr(ctx.obj.flow, step_name)
except:
raise CommandException("Step *%s* doesn't exist." % step_name)
if not func.is_step:
raise CommandException("Function *%s* is not a step." % step_name)
echo("Executing a step, *%s*" % step_name, fg="magenta", bold=False)
step_kwargs = ctx.params
# Remove argument `step_name` from `step_kwargs`.
step_kwargs.pop("step_name", None)
# Remove `opt_*` prefix from (some) option keys.
step_kwargs = dict(
[(k[4:], v) if k.startswith("opt_") else (k, v) for k, v in step_kwargs.items()]
)
cli_args._set_step_kwargs(step_kwargs)
ctx.obj.metadata.add_sticky_tags(tags=opt_tag)
if not input_paths and input_paths_filename:
with open(input_paths_filename, mode="r", encoding="utf-8") as f:
input_paths = f.read().strip(" \n\"'")
paths = decompress_list(input_paths) if input_paths else []
task = MetaflowTask(
ctx.obj.flow,
ctx.obj.flow_datastore,
ctx.obj.metadata,
ctx.obj.environment,
ctx.obj.echo,
ctx.obj.event_logger,
ctx.obj.monitor,
ubf_context,
)
if clone_only:
task.clone_only(
step_name,
run_id,
task_id,
clone_only,
retry_count,
)
else:
task.run_step(
step_name,
run_id,
task_id,
clone_run_id,
paths,
split_index,
retry_count,
max_user_code_retries,
)
echo("Success", fg="green", bold=True, indent=True)
@click.command(help="Internal command to spin a single task.", hidden=True)
@click.argument("step-name")
@click.option(
"--run-id",
default=None,
required=True,
help="Original run ID for the step that will be spun",
)
@click.option(
"--task-id",
default=None,
required=True,
help="Original Task ID for the step that will be spun",
)
@click.option(
"--orig-flow-datastore",
show_default=True,
help="Original datastore for the flow from which a task is being spun",
)
@click.option(
"--input-paths",
help="A comma-separated list of pathspecs specifying inputs for this step.",
)
@click.option(
"--split-index",
type=int,
default=None,
show_default=True,
help="Index of this foreach split.",
)
@click.option(
"--retry-count",
default=0,
help="How many times we have attempted to run this task.",
)
@click.option(
"--max-user-code-retries",
default=0,
help="How many times we should attempt running the user code.",
)
@click.option(
"--namespace",
"opt_namespace",
default=None,
help="Change namespace from the default (your username) to the specified tag.",
)
@click.option(
"--skip-decorators/--no-skip-decorators",
is_flag=True,
default=False,
show_default=True,
help="Skip decorators attached to the step or flow.",
)
@click.option(
"--persist/--no-persist",
"persist",
default=True,
show_default=True,
help="Whether to persist the artifacts in the spun step. If set to false, the artifacts will not"
" be persisted and will not be available in the spun step's datastore.",
)
@click.option(
"--artifacts-module",
default=None,
show_default=True,
help="Path to a module that contains artifacts to be used in the spun step. The artifacts should "
"be defined as a dictionary called ARTIFACTS with keys as the artifact names and values as the "
"artifact values. The artifact values will overwrite the default values of the artifacts used in "
"the spun step.",
)
@click.pass_context
def spin_step(
ctx,
step_name,
orig_flow_datastore,
run_id=None,
task_id=None,
input_paths=None,
split_index=None,
retry_count=None,
max_user_code_retries=None,
opt_namespace=None,
skip_decorators=False,
artifacts_module=None,
persist=True,
):
import time
if ctx.obj.is_quiet:
echo = echo_dev_null
else:
echo = echo_always
if opt_namespace is not None:
namespace(opt_namespace)
input_paths = decompress_list(input_paths) if input_paths else []
skip_decorators = skip_decorators
whitelist_decorators = [] if skip_decorators else SPIN_ALLOWED_DECORATORS
from_start("SpinStep: initialized decorators")
spin_artifacts = read_artifacts_module(artifacts_module) if artifacts_module else {}
from_start("SpinStep: read artifacts module")
ds_type, ds_root = orig_flow_datastore.split("@")
orig_datastore_impl = [d for d in DATASTORES if d.TYPE == ds_type][0]
orig_datastore_impl.datastore_root = ds_root
orig_flow_datastore = FlowDataStore(
ctx.obj.flow.name,
environment=None,
storage_impl=orig_datastore_impl,
ds_root=ds_root,
)
filecache = FileCache()
orig_flow_datastore.set_metadata_cache(
TaskMetadataCache(filecache, ds_type, ds_root, ctx.obj.flow.name)
)
orig_flow_datastore.ca_store.set_blob_cache(
FileBlobCache(
filecache, FileCache.flow_ds_id(ds_type, ds_root, ctx.obj.flow.name)
)
)
task = MetaflowTask(
ctx.obj.flow,
ctx.obj.flow_datastore,
ctx.obj.metadata,
ctx.obj.environment,
echo,
ctx.obj.event_logger,
ctx.obj.monitor,
None, # no unbounded foreach context
orig_flow_datastore=orig_flow_datastore,
spin_artifacts=spin_artifacts,
)
from_start("SpinStep: initialized task")
task.run_step(
step_name,
run_id,
task_id,
None,
input_paths,
split_index,
retry_count,
max_user_code_retries,
whitelist_decorators,
persist,
)
from_start("SpinStep: ran step")
================================================
FILE: metaflow/cli_components/utils.py
================================================
import importlib
from metaflow._vendor import click
from metaflow.extension_support.plugins import get_plugin
class LazyPluginCommandCollection(click.CommandCollection):
# lazy_source should only point to things that are resolved as CLI plugins.
def __init__(self, *args, lazy_sources=None, **kwargs):
super().__init__(*args, **kwargs)
# lazy_sources is a list of strings in the form
# "{plugin_name}" -> "{module-name}.{command-object-name}"
self.lazy_sources = lazy_sources or {}
self._lazy_loaded = {}
def invoke(self, ctx):
# NOTE: This is copied from MultiCommand.invoke. The change is that we
# behave like chain in the sense that we evaluate the subcommand *after*
# invoking the base command but we don't chain the commands like self.chain
# would otherwise indicate.
# The goal of this is to make sure that the first command is properly executed
# *first* prior to loading the other subcommands. It's more a lazy_subcommand_load
# than a chain.
# Look for CHANGE HERE in this code to see where the changes are made.
# If click is updated, this may also need to be updated. This version is for
# click 7.1.2.
def _process_result(value):
if self.result_callback is not None:
value = ctx.invoke(self.result_callback, value, **ctx.params)
return value
if not ctx.protected_args:
# If we are invoked without command the chain flag controls
# how this happens. If we are not in chain mode, the return
# value here is the return value of the command.
# If however we are in chain mode, the return value is the
# return value of the result processor invoked with an empty
# list (which means that no subcommand actually was executed).
if self.invoke_without_command:
# CHANGE HERE: We behave like self.chain = False here
# if not self.chain:
return click.Command.invoke(self, ctx)
# with ctx:
# click.Command.invoke(self, ctx)
# return _process_result([])
ctx.fail("Missing command.")
# Fetch args back out
args = ctx.protected_args + ctx.args
ctx.args = []
ctx.protected_args = []
# CHANGE HERE: Add saved_args so we have access to it in the command to be
# able to infer what we are calling next
ctx.saved_args = args
# If we're not in chain mode, we only allow the invocation of a
# single command but we also inform the current context about the
# name of the command to invoke.
# CHANGE HERE: We change this block to do the invoke *before* the resolve_command
# Make sure the context is entered so we do not clean up
# resources until the result processor has worked.
with ctx:
ctx.invoked_subcommand = "*" if args else None
click.Command.invoke(self, ctx)
cmd_name, cmd, args = self.resolve_command(ctx, args)
sub_ctx = cmd.make_context(cmd_name, args, parent=ctx)
with sub_ctx:
return _process_result(sub_ctx.command.invoke(sub_ctx))
# CHANGE HERE: Removed all the part of chain mode.
def list_commands(self, ctx):
base = super().list_commands(ctx)
for source_name, source in self.lazy_sources.items():
subgroup = self._lazy_load(source_name, source)
base.extend(subgroup.list_commands(ctx))
return base
def get_command(self, ctx, cmd_name):
base_cmd = super().get_command(ctx, cmd_name)
if base_cmd is not None:
return base_cmd
for source_name, source in self.lazy_sources.items():
subgroup = self._lazy_load(source_name, source)
cmd = subgroup.get_command(ctx, cmd_name)
if cmd is not None:
return cmd
return None
def _lazy_load(self, source_name, source_path):
if source_name in self._lazy_loaded:
return self._lazy_loaded[source_name]
cmd_object = get_plugin("cli", source_path, source_name)
if not isinstance(cmd_object, click.Group):
raise ValueError(
f"Lazy loading of {source_name} failed by returning "
"a non-group object"
)
self._lazy_loaded[source_name] = cmd_object
return cmd_object
class LazyGroup(click.Group):
def __init__(self, *args, lazy_subcommands=None, **kwargs):
super().__init__(*args, **kwargs)
# lazy_subcommands is a list of strings in the form
# "{command} -> "{module-name}.{command-object-name}"
self.lazy_subcommands = lazy_subcommands or {}
self._lazy_loaded = {}
def list_commands(self, ctx):
base = super().list_commands(ctx)
lazy = sorted(self.lazy_subcommands.keys())
return base + lazy
def get_command(self, ctx, cmd_name):
if cmd_name in self.lazy_subcommands:
return self._lazy_load(cmd_name)
return super().get_command(ctx, cmd_name)
def _lazy_load(self, cmd_name):
if cmd_name in self._lazy_loaded:
return self._lazy_loaded[cmd_name]
import_path = self.lazy_subcommands[cmd_name]
modname, cmd = import_path.rsplit(".", 1)
# do the import
mod = importlib.import_module(modname)
# get the Command object from that module
cmd_object = getattr(mod, cmd)
# check the result to make debugging easier. note that wrapped BaseCommand
# can be functions
if not isinstance(cmd_object, click.BaseCommand):
raise ValueError(
f"Lazy loading of {import_path} failed by returning "
f"a non-command object {type(cmd_object)}"
)
self._lazy_loaded[cmd_name] = cmd_object
return cmd_object
================================================
FILE: metaflow/client/__init__.py
================================================
# core client classes
from .core import (
namespace,
get_namespace,
default_namespace,
metadata,
get_metadata,
default_metadata,
inspect_spin,
Metaflow,
Flow,
Run,
Step,
Task,
DataArtifact,
)
================================================
FILE: metaflow/client/core.py
================================================
from __future__ import print_function
import json
import os
import tarfile
from collections import namedtuple
from datetime import datetime
from tempfile import TemporaryDirectory
from io import BytesIO
from itertools import chain
from typing import (
Any,
Dict,
FrozenSet,
Iterable,
Iterator,
List,
NamedTuple,
Optional,
TYPE_CHECKING,
Tuple,
)
from metaflow.metaflow_current import current
from metaflow.events import Trigger
from metaflow.exception import (
MetaflowInternalError,
MetaflowInvalidPathspec,
MetaflowNamespaceMismatch,
MetaflowNotFound,
)
from metaflow.includefile import IncludedFile
from metaflow.metaflow_config import DEFAULT_METADATA, MAX_ATTEMPTS
from metaflow.metaflow_environment import MetaflowEnvironment
from metaflow.package import MetaflowPackage
from metaflow.packaging_sys import ContentType
from metaflow.plugins import ENVIRONMENTS, METADATA_PROVIDERS
from metaflow.unbounded_foreach import CONTROL_TASK_TAG
from metaflow.util import cached_property, is_stringish, resolve_identity, to_unicode
from .filecache import FileCache
if TYPE_CHECKING:
from metaflow.metadata_provider import MetadataProvider
try:
# python2
import cPickle as pickle
except: # noqa E722
# python3
import pickle
# populated at the bottom of this file
_CLASSES = {}
Metadata = namedtuple("Metadata", ["name", "value", "created_at", "type", "task"])
filecache = None
current_namespace = False
current_metadata = False
def metadata(ms: str) -> str:
"""
Switch Metadata provider.
This call has a global effect. Selecting the local metadata will,
for example, not allow access to information stored in remote
metadata providers.
Note that you don't typically have to call this function directly. Usually
the metadata provider is set through the Metaflow configuration file. If you
need to switch between multiple providers, you can use the `METAFLOW_PROFILE`
environment variable to switch between configurations.
Parameters
----------
ms : str
Can be a path (selects local metadata), a URL starting with http (selects
the service metadata) or an explicit specification @; as an
example, you can specify local@ or service@.
Returns
-------
str
The description of the metadata selected (equivalent to the result of
get_metadata()).
"""
global current_metadata
provider, info = _metadata(ms)
if provider is None:
print(
"Cannot find a metadata provider -- "
"try specifying one explicitly using @",
)
return get_metadata()
current_metadata = provider
if info:
current_metadata.INFO = info
return get_metadata()
def get_metadata() -> str:
"""
Returns the current Metadata provider.
If this is not set explicitly using `metadata`, the default value is
determined through the Metaflow configuration. You can use this call to
check that your configuration is set up properly.
If multiple configuration profiles are present, this call returns the one
selected through the `METAFLOW_PROFILE` environment variable.
Returns
-------
str
Information about the Metadata provider currently selected. This information typically
returns provider specific information (like URL for remote providers or local paths for
local providers).
"""
if current_metadata is False:
default_metadata()
return current_metadata.metadata_str()
def default_metadata() -> str:
"""
Resets the Metadata provider to the default value, that is, to the value
that was used prior to any `metadata` calls.
Returns
-------
str
The result of get_metadata() after resetting the provider.
"""
global current_metadata
# We first check if we are in a flow -- if that is the case, we use the
# metadata provider that is being used there
if current._metadata_str:
return metadata(current._metadata_str)
default = [m for m in METADATA_PROVIDERS if m.TYPE == DEFAULT_METADATA]
if default:
current_metadata = default[0]
else:
from metaflow.plugins.metadata_providers import LocalMetadataProvider
current_metadata = LocalMetadataProvider
return get_metadata()
def namespace(ns: Optional[str]) -> Optional[str]:
"""
Switch namespace to the one provided.
This call has a global effect. No objects outside this namespace
will be accessible. To access all objects regardless of namespaces,
pass None to this call.
Parameters
----------
ns : str, optional
Namespace to switch to or None to ignore namespaces.
Returns
-------
str, optional
Namespace set (result of get_namespace()).
"""
global current_namespace
current_namespace = ns
return get_namespace()
def get_namespace() -> Optional[str]:
"""
Return the current namespace that is currently being used to filter objects.
The namespace is a tag associated with all objects in Metaflow.
Returns
-------
str, optional
The current namespace used to filter objects.
"""
# see a comment about namespace initialization
# in Metaflow.__init__ below
if current_namespace is False:
default_namespace()
return current_namespace
def default_namespace() -> str:
"""
Resets the namespace used to filter objects to the default one, i.e. the one that was
used prior to any `namespace` calls.
Returns
-------
str
The result of get_namespace() after the namespace has been reset.
"""
global current_namespace
current_namespace = resolve_identity()
return get_namespace()
def inspect_spin(datastore_root: str = "."):
"""
Set metadata provider to spin metadata so that users can inspect spin
steps, tasks, and artifacts.
Parameters
----------
datastore_root : str, default "."
The root path to the spin datastore.
"""
metadata_str = f"spin@{datastore_root}"
metadata(metadata_str)
MetaflowArtifacts = NamedTuple
class MetaflowObject(object):
"""
Base class for all Metaflow objects.
Creates a new object of a specific type (Flow, Run, Step, Task, DataArtifact) given
a path to it (its `pathspec`).
Accessing Metaflow objects is done through one of two methods:
- either by directly instantiating it with this class
- or by accessing it through its parent (iterating over
all children or accessing directly using the [] operator)
With this class, you can:
- Get a `Flow`; use `Flow('FlowName')`.
- Get a `Run` of a flow; use `Run('FlowName/RunID')`.
- Get a `Step` of a run; use `Step('FlowName/RunID/StepName')`.
- Get a `Task` of a step, use `Task('FlowName/RunID/StepName/TaskID')`
- Get a `DataArtifact` of a task; use
`DataArtifact('FlowName/RunID/StepName/TaskID/ArtifactName')`.
Attributes
----------
tags : FrozenSet[str]
Tags associated with the run this object belongs to (user and system tags).
user_tags: FrozenSet[str]
User tags associated with the run this object belongs to.
system_tags: FrozenSet[str]
System tags associated with the run this object belongs to.
created_at : datetime
Date and time this object was first created.
parent : MetaflowObject
Parent of this object. The parent of a `Run` is a `Flow` for example
pathspec : str
Pathspec of this object (for example: 'FlowName/RunID' for a `Run`)
path_components : List[str]
Components of the pathspec
origin_pathspec : str, optional
Pathspec of the original object this object was cloned from (in the case of a resume).
None if not applicable.
"""
_NAME = "base"
_CHILD_CLASS = None
_PARENT_CLASS = None
def __init__(
self,
pathspec: Optional[str] = None,
attempt: Optional[int] = None,
_object: Optional["MetaflowObject"] = None,
_parent: Optional["MetaflowObject"] = None,
_namespace_check: bool = True,
_metaflow: Optional["Metaflow"] = None,
_current_namespace: Optional[str] = None,
_current_metadata: Optional[str] = None,
):
# the default namespace is activated lazily at the first
# get_namespace(). The other option of activating
# the namespace at the import time is problematic, since there
# may be other modules that alter environment variables etc.
# which may affect the namespace setting.
self._metaflow = Metaflow(_current_metadata) or _metaflow
self._parent = _parent
self._path_components = None
self._attempt = attempt
self._current_namespace = _current_namespace or get_namespace()
self._namespace_check = _namespace_check
# If the current namespace is False, we disable checking for namespace for this
# and all children objects. Not setting namespace_check to False has the consequence
# of preventing access to children objects after the namespace changes
if self._current_namespace is None:
self._namespace_check = False
if self._attempt is not None:
if self._NAME not in ["task", "artifact"]:
raise MetaflowNotFound(
"Attempts can only be specified for Task or DataArtifact"
)
try:
self._attempt = int(self._attempt)
except ValueError:
raise MetaflowNotFound("Attempt can only be an integer")
if self._attempt < 0:
raise MetaflowNotFound("Attempt can only be non-negative")
elif self._attempt >= MAX_ATTEMPTS:
raise MetaflowNotFound(
"Attempt can only be smaller than %d" % MAX_ATTEMPTS
)
# NOTE: It is possible that no attempt exists, but we can't
# distinguish between "attempt will happen" and "no such
# attempt exists".
if pathspec and _object is None:
ids = pathspec.split("/")
if self._NAME == "flow" and len(ids) != 1:
raise MetaflowInvalidPathspec("Expects Flow('FlowName')")
elif self._NAME == "run" and len(ids) != 2:
raise MetaflowInvalidPathspec("Expects Run('FlowName/RunID')")
elif self._NAME == "step" and len(ids) != 3:
raise MetaflowInvalidPathspec("Expects Step('FlowName/RunID/StepName')")
elif self._NAME == "task" and len(ids) != 4:
raise MetaflowInvalidPathspec(
"Expects Task('FlowName/RunID/StepName/TaskID')"
)
elif self._NAME == "artifact" and len(ids) != 5:
raise MetaflowInvalidPathspec(
"Expects DataArtifact('FlowName/RunID/StepName/TaskID/ArtifactName')"
)
self.id = ids[-1]
self._pathspec = pathspec
self._object = self._get_object(*ids)
else:
self._object = _object
self._pathspec = pathspec
if self._NAME in ("flow", "task"):
self.id = str(self._object[self._NAME + "_id"])
elif self._NAME == "run":
self.id = str(self._object["run_number"])
elif self._NAME == "step":
self.id = str(self._object["step_name"])
elif self._NAME == "artifact":
self.id = str(self._object["name"])
else:
raise MetaflowInternalError(msg="Unknown type: %s" % self._NAME)
self._created_at = datetime.fromtimestamp(self._object["ts_epoch"] / 1000.0)
self._tags = frozenset(
chain(self._object.get("system_tags") or [], self._object.get("tags") or [])
)
self._user_tags = frozenset(self._object.get("tags") or [])
self._system_tags = frozenset(self._object.get("system_tags") or [])
if self._namespace_check and not self._is_in_namespace(self._current_namespace):
raise MetaflowNamespaceMismatch(self._current_namespace)
def _get_object(self, *path_components):
result = self._metaflow.metadata.get_object(
self._NAME, "self", None, self._attempt, *path_components
)
if not result:
raise MetaflowNotFound("%s does not exist" % self)
return result
def __iter__(self) -> Iterator["MetaflowObject"]:
"""
Iterate over all child objects of this object if any.
Note that only children present in the current namespace are returned if and
only if _namespace_check is set.
Yields
------
MetaflowObject
Children of this object
"""
query_filter = {}
# skip namespace filtering if _namespace_check is unset.
if self._namespace_check and self._current_namespace:
query_filter = {"any_tags": self._current_namespace}
unfiltered_children = self._metaflow.metadata.get_object(
self._NAME,
_CLASSES[self._CHILD_CLASS]._NAME,
query_filter,
self._attempt,
*self.path_components,
)
unfiltered_children = unfiltered_children if unfiltered_children else []
children = filter(
lambda x: self._iter_filter(x),
(
_CLASSES[self._CHILD_CLASS](
attempt=self._attempt,
_object=obj,
_parent=self,
_metaflow=self._metaflow,
_namespace_check=self._namespace_check,
_current_namespace=(
self._current_namespace if self._namespace_check else None
),
)
for obj in unfiltered_children
),
)
if children:
return iter(sorted(children, reverse=True, key=lambda x: x.created_at))
else:
return iter([])
def _iter_filter(self, x):
return True
def _filtered_children(self, *tags):
"""
Returns an iterator over all children.
If tags are specified, only children associated with all specified tags
are returned.
"""
for child in self:
if all(tag in child.tags for tag in tags):
yield child
def _ipython_key_completions_(self):
"""Returns available options for ipython auto-complete."""
return [child.id for child in self._filtered_children()]
@classmethod
def _url_token(cls):
return "%ss" % cls._NAME
def is_in_namespace(self) -> bool:
"""
Returns whether this object is in the current namespace.
If the current namespace is None, this will always return True.
Returns
-------
bool
Whether or not the object is in the current namespace
"""
return self._is_in_namespace(current_namespace)
def _is_in_namespace(self, ns: str) -> bool:
"""
Returns whether this object is in namespace passed in.
If the current namespace is None, this will always return True.
Parameters
----------
ns : str
Namespace to check if the object is in.
Returns
-------
bool
Whether or not the object is in the current namespace
"""
if self._NAME == "flow":
return any(True for _ in self)
else:
return ns is None or ns in self._tags
def __str__(self):
if self._attempt is not None:
return "%s('%s', attempt=%d)" % (
self.__class__.__name__,
self.pathspec,
self._attempt,
)
return "%s('%s')" % (self.__class__.__name__, self.pathspec)
def __repr__(self):
return str(self)
def _get_child(self, id):
result = []
for p in self.path_components:
result.append(p)
result.append(id)
return self._metaflow.metadata.get_object(
_CLASSES[self._CHILD_CLASS]._NAME, "self", None, self._attempt, *result
)
def __getitem__(self, id: str) -> "MetaflowObject":
"""
Returns the child object named 'id'.
Parameters
----------
id : str
Name of the child object
Returns
-------
MetaflowObject
Child object
Raises
------
KeyError
If the name does not identify a valid child object
"""
obj = self._get_child(id)
if obj:
return _CLASSES[self._CHILD_CLASS](
attempt=self._attempt,
_object=obj,
_parent=self,
_metaflow=self._metaflow,
_namespace_check=self._namespace_check,
_current_namespace=(
self._current_namespace if self._namespace_check else None
),
)
else:
raise KeyError(id)
def __contains__(self, id: str):
"""
Tests whether a child named 'id' exists.
Parameters
----------
id : str
Name of the child object
Returns
-------
bool
True if the child exists or False otherwise
"""
return bool(self._get_child(id))
def _unpickle_284(self, data):
if len(data) != 3:
raise MetaflowInternalError(
"Unexpected size of array: {}".format(len(data))
)
pathspec, attempt, namespace_check = data
self.__init__(
pathspec=pathspec, attempt=attempt, _namespace_check=namespace_check
)
def _unpickle_2124(self, data):
if len(data) != 4:
raise MetaflowInternalError(
"Unexpected size of array: {}".format(len(data))
)
pathspec, attempt, ns, namespace_check = data
self.__init__(
pathspec=pathspec,
attempt=attempt,
_namespace_check=namespace_check,
_current_namespace=ns,
)
def _unpickle_21227(self, data):
if len(data) != 5:
raise MetaflowInternalError(
"Unexpected size of array: {}".format(len(data))
)
pathspec, attempt, md, ns, namespace_check = data
self.__init__(
pathspec=pathspec,
attempt=attempt,
_namespace_check=namespace_check,
_current_metadata=md,
_current_namespace=ns,
)
_UNPICKLE_FUNC = {
"2.8.4": _unpickle_284,
"2.12.4": _unpickle_2124,
"2.12.27": _unpickle_21227,
}
def __setstate__(self, state):
"""
This function is used during the unpickling operation.
More info here https://docs.python.org/3/library/pickle.html#object.__setstate__
"""
if "version" in state and "data" in state:
version = state["version"]
if version not in self._UNPICKLE_FUNC:
# this happens when an object pickled using a newer version of Metaflow is
# being un-pickled using an older version of Metaflow
raise MetaflowInternalError(
"Unpickling this object requires a Metaflow version greater than or equal to {}".format(
version
)
)
self._UNPICKLE_FUNC[version](self, state["data"])
else:
# For backward compatibility: handles pickled objects that were serialized without a __getstate__ override
# We set namespace_check to False if it doesn't exist so that the user can
# continue accessing this object once unpickled.
self.__init__(
pathspec=state.get("_pathspec", None),
attempt=state.get("_attempt", None),
_namespace_check=state.get("_namespace_check", False),
_current_namespace=None,
)
def __getstate__(self):
"""
This function is used during the pickling operation.
More info here https://docs.python.org/3/library/pickle.html#object.__getstate__
This function is not forward compatible i.e., if this object (or any of the objects deriving
from this object) are pickled (serialized) in a later version of Metaflow, it may not be possible
to unpickle (deserialize) them in a previous version of Metaflow.
"""
# Note that we now record the namespace at the time of the object creation so
# we don't need to force namespace_check to be False and can properly continue
# checking for the namespace even after unpickling since we will know which
# namespace to check.
return {
"version": "2.12.27",
"data": [
self.pathspec,
self._attempt,
self._metaflow.metadata.metadata_str(),
self._current_namespace,
self._namespace_check,
],
}
@property
def tags(self) -> FrozenSet[str]:
"""
Tags associated with this object.
Tags can be user defined or system defined. This returns all tags associated
with the object.
Returns
-------
Set[str]
Tags associated with the object
"""
return self._tags
@property
def system_tags(self) -> FrozenSet[str]:
"""
System defined tags associated with this object.
Returns
-------
Set[str]
System tags associated with the object
"""
return self._system_tags
@property
def user_tags(self) -> FrozenSet[str]:
"""
User defined tags associated with this object.
Returns
-------
Set[str]
User tags associated with the object
"""
return self._user_tags
@property
def created_at(self) -> datetime:
"""
Creation time for this object.
This corresponds to the time the object's existence was first created which typically means
right before any code is run.
Returns
-------
datetime
Date time of this object's creation.
"""
return self._created_at
@property
def origin_pathspec(self) -> Optional[str]:
"""
The pathspec of the object from which the current object was cloned.
Returns:
str, optional
pathspec of the origin object from which current object was cloned.
"""
origin_pathspec = None
if self._NAME == "run":
latest_step = next(self.steps())
if latest_step and latest_step.task:
# If we had a step
task = latest_step.task
origin_run_id = [
m.value for m in task.metadata if m.name == "origin-run-id"
]
if origin_run_id:
origin_pathspec = "%s/%s" % (self.parent.id, origin_run_id[0])
else:
parent_pathspec = self.parent.origin_pathspec if self.parent else None
if parent_pathspec:
my_id = self.id
origin_task_id = None
if self._NAME == "task":
origin_task_id = [
m.value for m in self.metadata if m.name == "origin-task-id"
]
if origin_task_id:
my_id = origin_task_id[0]
else:
my_id = None
if my_id is not None:
origin_pathspec = "%s/%s" % (parent_pathspec, my_id)
return origin_pathspec
@property
def parent(self) -> Optional["MetaflowObject"]:
"""
Returns the parent object of this object or None if none exists.
Returns
-------
MetaflowObject, optional
The parent of this object
"""
if self._NAME == "flow":
return None
# Compute parent from pathspec and cache it.
if self._parent is None:
pathspec = self.pathspec
parent_pathspec = pathspec[: pathspec.rfind("/")]
# Only artifacts and tasks have attempts right now, so we get the
# right parent if we are an artifact.
attempt_to_pass = self._attempt if self._NAME == "artifact" else None
# We can skip the namespace check because if self._NAME = 'run',
# the parent object is guaranteed to be in namespace.
# Otherwise the check is moot for Flow since parent is singular.
self._parent = _CLASSES[self._PARENT_CLASS](
parent_pathspec, attempt=attempt_to_pass, _namespace_check=False
)
return self._parent
@property
def pathspec(self) -> str:
"""
Returns a string representation uniquely identifying this object.
The string is the same as the one you would pass into the constructor
to build this object except if you are looking for a specific attempt of
a task or a data artifact (in which case you need to add `attempt=`
in the constructor).
Returns
-------
str
Unique representation of this object
"""
if self._pathspec is None:
if self.parent is None:
self._pathspec = self.id
else:
parent_pathspec = self.parent.pathspec
self._pathspec = os.path.join(parent_pathspec, self.id)
return self._pathspec
@property
def path_components(self) -> List[str]:
"""
List of individual components of the pathspec.
Returns
-------
List[str]
Individual components of the pathspec
"""
if self._path_components is None:
ids = self.pathspec.split("/")
self._path_components = ids
return list(self._path_components)
class MetaflowCode(object):
"""
Snapshot of the code used to execute this `Run`. Instantiate the object through
`Run(...).code` (if any step is executed remotely) or `Task(...).code` for an
individual task. The code package is the same for all steps of a `Run`.
`MetaflowCode` includes a package of the user-defined `FlowSpec` class and supporting
files, as well as a snapshot of the Metaflow library itself.
Currently, `MetaflowCode` objects are stored only for `Run`s that have at least one `Step`
executing outside the user's local environment.
The `TarFile` for the `Run` is given by `Run(...).code.tarball`
Attributes
----------
path : str
Location (in the datastore provider) of the code package.
info : Dict[str, str]
Dictionary of information related to this code-package.
flowspec : str
Source code of the file containing the `FlowSpec` in this code package.
tarball : TarFile
Python standard library `tarfile.TarFile` archive containing all the code.
"""
def __init__(self, flow_name: str, code_package: str):
global filecache
self._flow_name = flow_name
info = json.loads(code_package)
self._path = info["location"]
self._ds_type = info["ds_type"]
self._sha = info["sha"]
self._code_metadata = info.get(
"metadata",
'{"version": 0, "archive_format": "tgz", "mfcontent_version": 0}',
)
self._backend = MetaflowPackage.get_backend(self._code_metadata)
if filecache is None:
filecache = FileCache()
_, blobdata = filecache.get_data(
self._ds_type, self._flow_name, self._path, self._sha
)
self._code_obj = BytesIO(blobdata)
self._info = MetaflowPackage.cls_get_info(self._code_metadata, self._code_obj)
self._code_obj.seek(0)
if self._info:
self._flowspec = MetaflowPackage.cls_get_content(
self._code_metadata, self._code_obj, self._info["script"]
)
self._code_obj.seek(0)
else:
raise MetaflowInternalError("Code package metadata is invalid.")
self._tarball = None
@property
def path(self) -> str:
"""
Location (in the datastore provider) of the code package.
Returns
-------
str
Full path of the code package
"""
return self._path
@property
def info(self) -> Dict[str, str]:
"""
Metadata associated with the code package.
Returns
-------
Dict[str, str]
Dictionary of metadata. Keys and values are strings
"""
return self._info
@property
def flowspec(self) -> str:
"""
Source code of the Python file containing the FlowSpec.
Returns
-------
str
Content of the Python file
"""
return self._flowspec
@property
def tarball(self) -> tarfile.TarFile:
"""
TarFile for this code package.
Returns
-------
TarFile
TarFile for everything in this code package
"""
# We only return one tarball because the different TarFile objects share
# a common bytes buffer (self._code_obj).
if self._tarball is not None:
return self._tarball
if self._backend.type == "tgz":
self._tarball = self._backend.cls_open(self._code_obj)
return self._tarball
raise RuntimeError("Archive is not a tarball")
def extract(self) -> TemporaryDirectory:
"""
Extracts the code package to a temporary directory.
This creates a temporary directory containing all user code
files from the code package. The temporary directory is
automatically deleted when the returned TemporaryDirectory
object is garbage collected or when its cleanup() is called.
To preserve the contents to a permanent location, use
os.replace() which performs a zero-copy move on the same
filesystem:
```python
with task.code.extract() as tmp_dir:
# Move contents to permanent location
for item in os.listdir(tmp_dir):
src = os.path.join(tmp_dir, item)
dst = os.path.join('/path/to/permanent/dir', item)
os.makedirs(os.path.dirname(dst), exist_ok=True)
os.replace(src, dst) # Atomic move operation
```
Returns
-------
TemporaryDirectory
A temporary directory containing the extracted code files.
The directory and its contents are automatically deleted when
this object is garbage collected.
"""
tmp = TemporaryDirectory()
# We save the position we are in _code_obj -- in case tarball is using it at
# the same time -- so we can reset it to not perturb tarball.
pos = self._code_obj.tell()
self._code_obj.seek(0)
MetaflowPackage.cls_extract_into(
self._code_metadata, self._code_obj, tmp.name, ContentType.USER_CONTENT
)
self._code_obj.seek(pos)
return tmp
@property
def script_name(self) -> str:
"""
Returns the filename of the Python script containing the FlowSpec.
This is the main Python file that was used to execute the flow. For example,
if your flow is defined in 'myflow.py', this property will return 'myflow.py'.
Returns
-------
str
Name of the Python file containing the FlowSpec
"""
return self._info["script"]
def __str__(self):
return "" % self._info["script"]
class DataArtifact(MetaflowObject):
"""
A single data artifact and associated metadata. Note that this object does
not contain other objects as it is the leaf object in the hierarchy.
Attributes
----------
data : object
The data contained in this artifact, that is, the object produced during
execution of this run.
sha : string
A unique ID of this artifact.
finished_at : datetime
Corresponds roughly to the `Task.finished_at` time of the parent `Task`.
An alias for `DataArtifact.created_at`.
"""
_NAME = "artifact"
_PARENT_CLASS = "task"
_CHILD_CLASS = None
@property
def data(self) -> Any:
"""
Unpickled representation of the data contained in this artifact.
Returns
-------
object
Object contained in this artifact
"""
global filecache
ds_type = self._object["ds_type"]
location = self._object["location"]
components = self.path_components
if filecache is None:
# TODO: Pass proper environment to properly extract artifacts
filecache = FileCache()
# "create" the metadata information that the datastore needs
# to access this object.
# TODO: We can store more information in the metadata, particularly
# to determine if we need an environment to unpickle the artifact.
meta = {
"objects": {self._object["name"]: self._object["sha"]},
"info": {
self._object["name"]: {
"size": 0,
"type": None,
"encoding": self._object["content_type"],
}
},
}
if location.startswith(":root:"):
obj = filecache.get_artifact(ds_type, location[6:], meta, *components)
else:
# Older artifacts have a location information which we can use.
obj = filecache.get_artifact_by_location(
ds_type, location, meta, *components
)
if isinstance(obj, IncludedFile):
return obj.decode(self.id)
return obj
@property
def size(self) -> int:
"""
Returns the size (in bytes) of the pickled object representing this
DataArtifact
Returns
-------
int
size of the pickled representation of data artifact (in bytes)
"""
global filecache
ds_type = self._object["ds_type"]
location = self._object["location"]
components = self.path_components
if filecache is None:
# TODO: Pass proper environment to properly extract artifacts
filecache = FileCache()
if location.startswith(":root:"):
return filecache.get_artifact_size(
ds_type, location[6:], self._attempt, *components
)
else:
return filecache.get_artifact_size_by_location(
ds_type, location, self._attempt, *components
)
# TODO add
# @property
# def type(self)
@property
def sha(self) -> str:
"""
Unique identifier for this artifact.
This is a unique hash of the artifact (historically SHA1 hash)
Returns
-------
str
Hash of this artifact
"""
return self._object["sha"]
@property
def finished_at(self) -> datetime:
"""
Creation time for this artifact.
Alias for created_at.
Returns
-------
datetime
Creation time
"""
return self.created_at
def __getstate__(self):
return super(DataArtifact, self).__getstate__()
def __setstate__(self, state):
super(DataArtifact, self).__setstate__(state)
class MetaflowData(object):
"""
Container of data artifacts produced by a `Task`. This object is
instantiated through `Task.data`.
`MetaflowData` allows results to be retrieved by their name
through a convenient dot notation:
```python
Task(...).data.my_object
```
You can also test the existence of an object
```python
if 'my_object' in Task(...).data:
print('my_object found')
```
Note that this container relies on the local cache to load all data
artifacts. If your `Task` contains a lot of data, a more efficient
approach is to load artifacts individually like so
```
Task(...)['my_object'].data
```
"""
def __init__(self, artifacts: Iterable[DataArtifact]):
self._artifacts = dict((art.id, art) for art in artifacts)
def __getattr__(self, name: str):
if name not in self._artifacts:
raise AttributeError(name)
return self._artifacts[name].data
def __contains__(self, var):
return var in self._artifacts
def __str__(self):
return "" % ", ".join(self._artifacts)
def __repr__(self):
return str(self)
class Task(MetaflowObject):
"""
A `Task` represents an execution of a `Step`.
It contains all `DataArtifact` objects produced by the task as
well as metadata related to execution.
Note that the `@retry` decorator may cause multiple attempts of
the task to be present. Usually you want the latest attempt, which
is what instantiating a `Task` object returns by default. If
you need to e.g. retrieve logs from a failed attempt, you can
explicitly get information about a specific attempt by using the
following syntax when creating a task:
`Task('flow/run/step/task', attempt=)`
where `attempt=0` corresponds to the first attempt etc.
Attributes
----------
metadata : List[Metadata]
List of all metadata events associated with the task.
metadata_dict : Dict[str, str]
A condensed version of `metadata`: A dictionary where keys
are names of metadata events and values the latest corresponding event.
data : MetaflowData
Container of all data artifacts produced by this task. Note that this
call downloads all data locally, so it can be slower than accessing
artifacts individually. See `MetaflowData` for more information.
artifacts : MetaflowArtifacts
Container of `DataArtifact` objects produced by this task.
successful : bool
True if the task completed successfully.
finished : bool
True if the task completed.
exception : object
Exception raised by this task if there was one.
finished_at : datetime
Time this task finished.
runtime_name : str
Runtime this task was executed on.
stdout : str
Standard output for the task execution.
stderr : str
Standard error output for the task execution.
code : MetaflowCode
Code package for this task (if present). See `MetaflowCode`.
environment_info : Dict[str, str]
Information about the execution environment.
"""
_NAME = "task"
_PARENT_CLASS = "step"
_CHILD_CLASS = "artifact"
def _iter_filter(self, x):
# exclude private data artifacts
return x.id[0] != "_"
def _get_matching_pathspecs(self, steps, metadata_key, metadata_pattern):
"""
Yield pathspecs of tasks from specified steps that match a given metadata pattern.
Parameters
----------
steps : List[str]
List of Step objects to search for tasks.
metadata_key : str
Metadata key to filter tasks on (e.g., 'foreach-execution-path').
metadata_pattern : str
Regular expression pattern to match against the metadata value.
Yields
------
str
Pathspec of each task whose metadata value for the specified key matches the pattern.
"""
flow_id, run_id, _, _ = self.path_components
for step in steps:
task_pathspecs = self._metaflow.metadata.filter_tasks_by_metadata(
flow_id, run_id, step, metadata_key, metadata_pattern
)
for task_pathspec in task_pathspecs:
yield task_pathspec
@staticmethod
def _get_previous_steps(graph_info, step_name):
# Get the parent steps
steps = []
for node_name, attributes in graph_info["steps"].items():
if step_name in attributes["next"]:
steps.append(node_name)
return steps
@property
def parent_task_pathspecs(self) -> Iterator[str]:
"""
Yields pathspecs of all parent tasks of the current task.
Yields
------
str
Pathspec of the parent task of the current task
"""
_, _, step_name, _ = self.path_components
metadata_dict = self.metadata_dict
graph_info = self["_graph_info"].data
# Get the parent steps
steps = self._get_previous_steps(graph_info, step_name)
node_type = graph_info["steps"][step_name]["type"]
metadata_key = "foreach-execution-path"
current_path = metadata_dict.get(metadata_key)
if len(steps) > 1:
# Static join - use exact path matching
pattern = current_path or ".*"
else:
if not steps:
return # No parent steps, yield nothing
if not current_path:
# Current task is not part of a foreach
# Pattern: ".*"
pattern = ".*"
else:
current_depth = len(current_path.split(","))
if node_type == "join":
# Foreach join
# (Current task, "A:10,B:13") and (Parent task, "A:10,B:13,C:21")
# Pattern: "A:10,B:13,.*"
pattern = f"{current_path},.*"
else:
# Foreach split or linear step
# Pattern: "A:10,B:13"
parent_step_type = graph_info["steps"][steps[0]]["type"]
target_depth = current_depth
if (
parent_step_type == "split-foreach"
or parent_step_type == "split-parallel"
) and current_depth == 1:
# (Current task, "A:10") and (Parent task, "")
pattern = ".*"
else:
# (Current task, "A:10,B:13,C:21") and (Parent task, "A:10,B:13")
# (Current task, "A:10,B:13") and (Parent task, "A:10,B:13")
if (
parent_step_type == "split-foreach"
or parent_step_type == "split-parallel"
):
target_depth = current_depth - 1
pattern = ",".join(current_path.split(",")[:target_depth])
for pathspec in self._get_matching_pathspecs(steps, metadata_key, pattern):
yield pathspec
@property
def child_task_pathspecs(self) -> Iterator[str]:
"""
Yields pathspecs of all child tasks of the current task.
Yields
------
str
Pathspec of the child task of the current task
"""
flow_id, run_id, step_name, _ = self.path_components
metadata_dict = self.metadata_dict
graph_info = self["_graph_info"].data
# Get the child steps
steps = graph_info["steps"][step_name]["next"]
node_type = graph_info["steps"][step_name]["type"]
metadata_key = "foreach-execution-path"
current_path = metadata_dict.get(metadata_key)
if len(steps) > 1:
# Static split - use exact path matching
pattern = current_path or ".*"
else:
if not steps:
return # No child steps, yield nothing
if not current_path:
# Current task is not part of a foreach
# Pattern: ".*"
pattern = ".*"
else:
current_depth = len(current_path.split(","))
if node_type == "split-foreach" or node_type == "split-parallel":
# Foreach split
# (Current task, "A:10,B:13") and (Child task, "A:10,B:13,C:21")
# Pattern: "A:10,B:13,.*"
pattern = f"{current_path},.*"
else:
# Foreach join or linear step
# Pattern: "A:10,B:13"
child_step_type = graph_info["steps"][steps[0]]["type"]
# We need to know if the child step is a foreach join or a static join
child_step_prev_steps = self._get_previous_steps(
graph_info, steps[0]
)
if len(child_step_prev_steps) > 1:
child_step_type = "static-join"
target_depth = current_depth
if child_step_type == "join" and current_depth == 1:
# (Current task, "A:10") and (Child task, "")
pattern = ".*"
else:
# (Current task, "A:10,B:13,C:21") and (Child task, "A:10,B:13")
# (Current task, "A:10,B:13") and (Child task, "A:10,B:13")
if child_step_type == "join":
target_depth = current_depth - 1
pattern = ",".join(current_path.split(",")[:target_depth])
for pathspec in self._get_matching_pathspecs(steps, metadata_key, pattern):
yield pathspec
@property
def parent_tasks(self) -> Iterator["Task"]:
"""
Yields all parent tasks of the current task if one exists.
Yields
------
Task
Parent task of the current task
"""
parent_task_pathspecs = self.parent_task_pathspecs
for pathspec in parent_task_pathspecs:
yield Task(pathspec=pathspec, _namespace_check=False)
@property
def child_tasks(self) -> Iterator["Task"]:
"""
Yields all child tasks of the current task if one exists.
Yields
------
Task
Child task of the current task
"""
for pathspec in self.child_task_pathspecs:
yield Task(pathspec=pathspec, _namespace_check=False)
@property
def metadata(self) -> List[Metadata]:
"""
Metadata events produced by this task across all attempts of the task
*except* if you selected a specific task attempt.
Note that Metadata is different from tags.
Returns
-------
List[Metadata]
Metadata produced by this task
"""
all_metadata = self._metaflow.metadata.get_object(
self._NAME, "metadata", None, self._attempt, *self.path_components
)
all_metadata = all_metadata if all_metadata else []
# For "clones" (ie: they have an origin-run-id AND a origin-task-id), we
# copy a set of metadata from the original task. This is needed to make things
# like logs work (which rely on having proper values for ds-root for example)
origin_run_id = None
origin_task_id = None
result = []
existing_keys = []
for obj in all_metadata:
result.append(
Metadata(
name=obj.get("field_name"),
value=obj.get("value"),
created_at=obj.get("ts_epoch"),
type=obj.get("type"),
task=self,
)
)
existing_keys.append(obj.get("field_name"))
if obj.get("field_name") == "origin-run-id":
origin_run_id = obj.get("value")
elif obj.get("field_name") == "origin-task-id":
origin_task_id = obj.get("value")
if origin_task_id:
# This is a "cloned" task. We consider that it has the same
# metadata as the last attempt of the cloned task.
origin_obj_pathcomponents = self.path_components
origin_obj_pathcomponents[1] = origin_run_id
origin_obj_pathcomponents[3] = origin_task_id
origin_task = Task(
"/".join(origin_obj_pathcomponents), _namespace_check=False
)
latest_metadata = {
m.name: m
for m in sorted(origin_task.metadata, key=lambda m: m.created_at)
}
# We point to ourselves in the Metadata object
for v in latest_metadata.values():
if v.name in existing_keys:
continue
result.append(
Metadata(
name=v.name,
value=v.value,
created_at=v.created_at,
type=v.type,
task=self,
)
)
return result
@property
def metadata_dict(self) -> Dict[str, str]:
"""
Dictionary mapping metadata names (keys) and their associated values.
Note that unlike the metadata() method, this call will only return the latest
metadata for a given name. For example, if a task executes multiple times (retries),
the same metadata name will be generated multiple times (one for each execution of the
task). The metadata() method returns all those metadata elements whereas this call will
return the metadata associated with the latest execution of the task.
Returns
-------
Dict[str, str]
Dictionary mapping metadata name with value
"""
# use the newest version of each key, hence sorting
return {
m.name: m.value for m in sorted(self.metadata, key=lambda m: m.created_at)
}
@property
def index(self) -> Optional[int]:
"""
Returns the index of the innermost foreach loop if this task is run inside at least
one foreach.
The index is what distinguishes the various tasks inside a given step.
This call returns None if this task was not run in a foreach loop.
Returns
-------
int, optional
Index in the innermost loop for this task
"""
try:
return self["_foreach_stack"].data[-1].index
except (KeyError, IndexError):
return None
@property
def data(self) -> MetaflowData:
"""
Returns a container of data artifacts produced by this task.
You can access data produced by this task as follows:
```
print(task.data.my_var)
```
Returns
-------
MetaflowData
Container of all artifacts produced by this task
"""
return MetaflowData(self)
@property
def artifacts(self) -> MetaflowArtifacts:
"""
Returns a container of DataArtifacts produced by this task.
You can access each DataArtifact by name like so:
```
print(task.artifacts.my_var)
```
This method differs from data() because it returns DataArtifact objects
(which contain additional metadata) as opposed to just the data.
Returns
-------
MetaflowArtifacts
Container of all DataArtifacts produced by this task
"""
arts = list(self)
obj = namedtuple("MetaflowArtifacts", [art.id for art in arts])
return obj._make(arts)
@property
def successful(self) -> bool:
"""
Indicates whether or not the task completed successfully.
This information is always about the latest task to have completed (in case
of retries).
Returns
-------
bool
True if the task completed successfully and False otherwise
"""
try:
return self["_success"].data
except KeyError:
return False
@property
def finished(self) -> bool:
"""
Indicates whether or not the task completed.
This information is always about the latest task to have completed (in case
of retries).
Returns
-------
bool
True if the task completed and False otherwise
"""
try:
return self["_task_ok"].data
except KeyError:
return False
@property
def exception(self) -> Optional[Any]:
"""
Returns the exception that caused the task to fail, if any.
This information is always about the latest task to have completed (in case
of retries). If successful() returns False and finished() returns True,
this method can help determine what went wrong.
Returns
-------
object
Exception raised by the task or None if not applicable
"""
try:
return self["_exception"].data
except KeyError:
return None
@property
def finished_at(self) -> Optional[datetime]:
"""
Returns the datetime object of when the task finished (successfully or not).
This information is always about the latest task to have completed (in case
of retries). This call will return None if the task is not finished.
Returns
-------
datetime
Datetime of when the task finished
"""
try:
return self["_task_ok"].created_at
except KeyError:
return None
@property
def runtime_name(self) -> Optional[str]:
"""
Returns the name of the runtime this task executed on.
Returns
-------
str
Name of the runtime this task executed on
"""
for t in self._tags:
if t.startswith("runtime:"):
return t.split(":")[1]
return None
@property
def stdout(self) -> str:
"""
Returns the full standard out of this task.
If you specify a specific attempt for this task, it will return the
standard out for that attempt. If you do not specify an attempt,
this will return the current standard out for the latest *started*
attempt of the task. In both cases, multiple calls to this
method will return the most up-to-date log (so if an attempt is not
done, each call will fetch the latest log).
Returns
-------
str
Standard output of this task
"""
return self._load_log("stdout")
@property
def stdout_size(self) -> int:
"""
Returns the size of the stdout log of this task.
Similar to `stdout`, the size returned is the latest size of the log
(so for a running attempt, this value will increase as the task produces
more output).
Returns
-------
int
Size of the stdout log content (in bytes)
"""
return self._get_logsize("stdout")
@property
def stderr(self) -> str:
"""
Returns the full standard error of this task.
If you specify a specific attempt for this task, it will return the
standard error for that attempt. If you do not specify an attempt,
this will return the current standard error for the latest *started*
attempt. In both cases, multiple calls to this
method will return the most up-to-date log (so if an attempt is not
done, each call will fetch the latest log).
Returns
-------
str
Standard error of this task
"""
return self._load_log("stderr")
@property
def stderr_size(self) -> int:
"""
Returns the size of the stderr log of this task.
Similar to `stderr`, the size returned is the latest size of the log
(so for a running attempt, this value will increase as the task produces
more output).
Returns
-------
int
Size of the stderr log content (in bytes)
"""
return self._get_logsize("stderr")
@property
def current_attempt(self) -> int:
"""
Get the relevant attempt for this Task.
Returns the specific attempt used when
initializing the instance, or the latest *started* attempt for the Task.
Returns
-------
int
attempt id for this task object
"""
if self._attempt is not None:
attempt = self._attempt
else:
# It is possible that a task fails before any metadata has been
# recorded. In this case, we assume that we are executing the
# first attempt.
#
# FIXME: Technically we are looking at the latest *recorded* attempt
# here. It is possible that logs exists for a newer attempt that
# just failed to record metadata. We could make this logic more robust
# and guarantee that we always return the latest available log.
attempt = int(self.metadata_dict.get("attempt", 0))
return attempt
@cached_property
def code(self) -> Optional[MetaflowCode]:
"""
Returns the MetaflowCode object for this task, if present.
Not all tasks save their code so this call may return None in those cases.
Returns
-------
MetaflowCode
Code package for this task
"""
code_package = self.metadata_dict.get("code-package")
if code_package:
return MetaflowCode(self.path_components[0], code_package)
return None
@cached_property
def environment_info(self) -> Dict[str, Any]:
"""
Returns information about the environment that was used to execute this task. As an
example, if the Conda environment is selected, this will return information about the
dependencies that were used in the environment.
This environment information is only available for tasks that have a code package.
Returns
-------
Dict
Dictionary describing the environment
"""
my_code = self.code
if not my_code:
return None
env_type = my_code.info["environment_type"]
if not env_type:
return None
env = [m for m in ENVIRONMENTS + [MetaflowEnvironment] if m.TYPE == env_type][0]
meta_dict = self.metadata_dict
return env.get_client_info(self.path_components[0], meta_dict)
def _load_log(self, stream):
meta_dict = self.metadata_dict
log_location = meta_dict.get("log_location_%s" % stream)
if log_location:
return self._load_log_legacy(log_location, stream)
else:
return "".join(
line + "\n" for _, line in self.loglines(stream, meta_dict=meta_dict)
)
def _get_logsize(self, stream):
meta_dict = self.metadata_dict
log_location = meta_dict.get("log_location_%s" % stream)
if log_location:
return self._legacy_log_size(log_location, stream)
else:
return self._log_size(stream, meta_dict)
def loglines(
self,
stream: str,
as_unicode: bool = True,
meta_dict: Optional[Dict[str, Any]] = None,
) -> Iterator[Tuple[datetime, str]]:
"""
Return an iterator over (utc_timestamp, logline) tuples.
Parameters
----------
stream : str
Either 'stdout' or 'stderr'.
as_unicode : bool, default: True
If as_unicode=False, each logline is returned as a byte object. Otherwise,
it is returned as a (unicode) string.
Yields
------
Tuple[datetime, str]
Tuple of timestamp, logline pairs.
"""
from metaflow.mflog.mflog import merge_logs
global filecache
if meta_dict is None:
meta_dict = self.metadata_dict
ds_type = meta_dict.get("ds-type")
ds_root = meta_dict.get("ds-root")
if ds_type is None or ds_root is None:
yield None, ""
return
if filecache is None:
filecache = FileCache()
attempt = self.current_attempt
logs = filecache.get_logs_stream(
ds_type, ds_root, stream, attempt, *self.path_components
)
for line in merge_logs([blob for _, blob in logs]):
msg = to_unicode(line.msg) if as_unicode else line.msg
yield line.utc_tstamp, msg
def _load_log_legacy(self, log_location, logtype, as_unicode=True):
# this function is used to load pre-mflog style logfiles
global filecache
log_info = json.loads(log_location)
location = log_info["location"]
ds_type = log_info["ds_type"]
attempt = log_info["attempt"]
if filecache is None:
filecache = FileCache()
ret_val = filecache.get_log_legacy(
ds_type, location, logtype, int(attempt), *self.path_components
)
if as_unicode and (ret_val is not None):
return ret_val.decode(encoding="utf8")
else:
return ret_val
def _legacy_log_size(self, log_location, logtype):
global filecache
log_info = json.loads(log_location)
location = log_info["location"]
ds_type = log_info["ds_type"]
attempt = log_info["attempt"]
if filecache is None:
filecache = FileCache()
return filecache.get_legacy_log_size(
ds_type, location, logtype, int(attempt), *self.path_components
)
def _log_size(self, stream, meta_dict):
global filecache
ds_type = meta_dict.get("ds-type")
ds_root = meta_dict.get("ds-root")
if ds_type is None or ds_root is None:
return 0
if filecache is None:
filecache = FileCache()
attempt = self.current_attempt
return filecache.get_log_size(
ds_type, ds_root, stream, attempt, *self.path_components
)
def __iter__(self) -> Iterator[DataArtifact]:
"""
Iterate over all children DataArtifact of this Task
Yields
------
DataArtifact
A DataArtifact in this Step
"""
for d in super(Task, self).__iter__():
yield d
def __getitem__(self, name: str) -> DataArtifact:
"""
Returns the DataArtifact object with the artifact name 'name'
Parameters
----------
name : str
Data artifact name
Returns
-------
DataArtifact
DataArtifact for this artifact name in this task
Raises
------
KeyError
If the name does not identify a valid DataArtifact object
"""
return super(Task, self).__getitem__(name)
def __getstate__(self):
return super(Task, self).__getstate__()
def __setstate__(self, state):
super(Task, self).__setstate__(state)
class Step(MetaflowObject):
"""
A `Step` represents a user-defined step, that is, a method annotated with the `@step` decorator.
It contains `Task` objects associated with the step, that is, all executions of the
`Step`. The step may contain multiple `Task`s in the case of a foreach step.
Attributes
----------
task : Task
The first `Task` object in this step. This is a shortcut for retrieving the only
task contained in a non-foreach step.
finished_at : datetime
Time when the latest `Task` of this step finished. Note that in the case of foreaches,
this time may change during execution of the step.
environment_info : Dict[str, Any]
Information about the execution environment.
"""
_NAME = "step"
_PARENT_CLASS = "run"
_CHILD_CLASS = "task"
@property
def task(self) -> Optional[Task]:
"""
Returns a Task object belonging to this step.
This is useful when the step only contains one task (a linear step for example).
Returns
-------
Task
A task in the step
"""
for t in self:
return t
def tasks(self, *tags: str) -> Iterable[Task]:
"""
[Legacy function - do not use]
Returns an iterator over all `Task` objects in the step. This is an alias
to iterating the object itself, i.e.
```
list(Step(...)) == list(Step(...).tasks())
```
Parameters
----------
tags : str
No op (legacy functionality)
Yields
------
Task
`Task` objects in this step.
"""
return self._filtered_children(*tags)
@property
def control_task(self) -> Optional[Task]:
"""
[Unpublished API - use with caution!]
Returns a Control Task object belonging to this step.
This is useful when the step only contains one control task.
Returns
-------
Task
A control task in the step
"""
return next(self.control_tasks(), None)
def control_tasks(self, *tags: str) -> Iterator[Task]:
"""
[Unpublished API - use with caution!]
Returns an iterator over all the control tasks in the step.
An optional filter is available that allows you to filter on tags. The
control tasks returned if the filter is specified will contain all the
tags specified.
Parameters
----------
tags : str
Tags to match
Yields
------
Task
Control Task objects for this step
"""
children = super(Step, self).__iter__()
for child in children:
# first filter by standard tag filters
if not all(tag in child.tags for tag in tags):
continue
# Then look for control task indicator in one of two ways
# Look in tags - this path will activate for metadata service
# backends that pre-date tag mutation release
if CONTROL_TASK_TAG in child.tags:
yield child
else:
# Look in task metadata
for task_metadata in child.metadata:
if (
task_metadata.name == "internal_task_type"
and task_metadata.value == CONTROL_TASK_TAG
):
yield child
def __iter__(self) -> Iterator[Task]:
"""
Iterate over all children Task of this Step
Yields
------
Task
A Task in this Step
"""
for t in super(Step, self).__iter__():
yield t
def __getitem__(self, task_id: str) -> Task:
"""
Returns the Task object with the task ID 'task_id'
Parameters
----------
task_id : str
Task ID
Returns
-------
Task
Task for this task ID in this Step
Raises
------
KeyError
If the task_id does not identify a valid Task object
"""
return super(Step, self).__getitem__(task_id)
def __getstate__(self):
return super(Step, self).__getstate__()
def __setstate__(self, state):
super(Step, self).__setstate__(state)
@property
def finished_at(self) -> Optional[datetime]:
"""
Returns the datetime object of when the step finished (successfully or not).
A step is considered finished when all the tasks that belong to it have
finished. This call will return None if the step has not finished
Returns
-------
datetime
Datetime of when the step finished
"""
try:
return max(task.finished_at for task in self)
except TypeError:
# Raised if None is present in max
return None
@property
def environment_info(self) -> Optional[Dict[str, Any]]:
"""
Returns information about the environment that was used to execute this step. As an
example, if the Conda environment is selected, this will return information about the
dependencies that were used in the environment.
This environment information is only available for steps that have tasks
for which the code package has been saved.
Returns
-------
Dict[str, Any], optional
Dictionary describing the environment
"""
# All tasks have the same environment info so just use the first one
for t in self:
return t.environment_info
@property
def parent_steps(self) -> Iterator["Step"]:
"""
Yields parent steps for the current step.
Yields
------
Step
Parent step
"""
graph_info = self.task["_graph_info"].data
if self.id != "start":
flow, run, _ = self.path_components
for node_name, attributes in graph_info["steps"].items():
if self.id in attributes["next"]:
yield Step(f"{flow}/{run}/{node_name}", _namespace_check=False)
@property
def child_steps(self) -> Iterator["Step"]:
"""
Yields child steps for the current step.
Yields
------
Step
Child step
"""
graph_info = self.task["_graph_info"].data
if self.id != "end":
flow, run, _ = self.path_components
for next_step in graph_info["steps"][self.id]["next"]:
yield Step(f"{flow}/{run}/{next_step}", _namespace_check=False)
class Run(MetaflowObject):
"""
A `Run` represents an execution of a `Flow`. It is a container of `Step`s.
Attributes
----------
data : MetaflowData
a shortcut to run['end'].task.data, i.e. data produced by this run.
successful : bool
True if the run completed successfully.
finished : bool
True if the run completed.
finished_at : datetime
Time this run finished.
code : MetaflowCode
Code package for this run (if present). See `MetaflowCode`.
trigger : MetaflowTrigger
Information about event(s) that triggered this run (if present). See `MetaflowTrigger`.
end_task : Task
`Task` for the end step (if it is present already).
"""
_NAME = "run"
_PARENT_CLASS = "flow"
_CHILD_CLASS = "step"
def _iter_filter(self, x):
# exclude _parameters step
return x.id[0] != "_"
def steps(self, *tags: str) -> Iterator[Step]:
"""
[Legacy function - do not use]
Returns an iterator over all `Step` objects in the step. This is an alias
to iterating the object itself, i.e.
```
list(Run(...)) == list(Run(...).steps())
```
Parameters
----------
tags : str
No op (legacy functionality)
Yields
------
Step
`Step` objects in this run.
"""
return self._filtered_children(*tags)
@property
def code(self) -> Optional[MetaflowCode]:
"""
Returns the MetaflowCode object for this run, if present.
Code is packed if atleast one `Step` runs remotely, else None is returned.
Returns
-------
MetaflowCode, optional
Code package for this run
"""
# Note that this can be quite slow in the edge-case where the codepackage is only available
# for the last step on the list. Steps are reverse-ordered, so the worst-case scenario is
# if the start step executes remotely and every step after that is remote.
#
# TODO: A more optimized way of figuring out if a run has remote steps (and thus a codepackage) available.
# This might require changes to the metadata-service as well.
for step in self:
if step.task:
code = step.task.code
if code:
return code
@property
def data(self) -> Optional[MetaflowData]:
"""
Returns a container of data artifacts produced by this run.
You can access data produced by this run as follows:
```
print(run.data.my_var)
```
This is a shorthand for `run['end'].task.data`. If the 'end' step has not yet
executed, returns None.
Returns
-------
MetaflowData, optional
Container of all artifacts produced by this task
"""
end = self.end_task
if end:
return end.data
@property
def successful(self) -> bool:
"""
Indicates whether or not the run completed successfully.
A run is successful if its 'end' step is successful.
Returns
-------
bool
True if the run completed successfully and False otherwise
"""
end = self.end_task
if end:
return end.successful
else:
return False
@property
def finished(self) -> bool:
"""
Indicates whether or not the run completed.
A run completed if its 'end' step completed.
Returns
-------
bool
True if the run completed and False otherwise
"""
end = self.end_task
if end:
return end.finished
else:
return False
@property
def finished_at(self) -> Optional[datetime]:
"""
Returns the datetime object of when the run finished (successfully or not).
The completion time of a run is the same as the completion time of its 'end' step.
If the 'end' step has not completed, returns None.
Returns
-------
datetime, optional
Datetime of when the run finished
"""
end = self.end_task
if end:
return end.finished_at
@property
def end_task(self) -> Optional[Task]:
"""
Returns the Task corresponding to the 'end' step.
This returns None if the end step does not yet exist.
Returns
-------
Task, optional
The 'end' task
"""
try:
end_step = self["end"]
except KeyError:
return None
return end_step.task
def add_tag(self, tag: str):
"""
Add a tag to this `Run`.
Note that if the tag is already a system tag, it is not added as a user tag,
and no error is thrown.
Parameters
----------
tag : str
Tag to add.
"""
# For backwards compatibility with Netflix's early version of this functionality,
# this function shall accept both an individual tag AND iterables of tags.
#
# Iterable of tags support shall be removed in future once existing
# usage has been migrated off.
if is_stringish(tag):
tag = [tag]
return self.replace_tag([], tag)
def add_tags(self, tags: Iterable[str]):
"""
Add one or more tags to this `Run`.
Note that if any tag is already a system tag, it is not added as a user tag
and no error is thrown.
Parameters
----------
tags : Iterable[str]
Tags to add.
"""
return self.replace_tag([], tags)
def remove_tag(self, tag: str):
"""
Remove one tag from this `Run`.
Removing a system tag is an error. Removing a non-existent
user tag is a no-op.
Parameters
----------
tag : str
Tag to remove.
"""
# For backwards compatibility with Netflix's early version of this functionality,
# this function shall accept both an individual tag AND iterables of tags.
#
# Iterable of tags support shall be removed in future once existing
# usage has been migrated off.
if is_stringish(tag):
tag = [tag]
return self.replace_tag(tag, [])
def remove_tags(self, tags: Iterable[str]):
"""
Remove one or more tags to this `Run`.
Removing a system tag will result in an error. Removing a non-existent
user tag is a no-op.
Parameters
----------
tags : Iterable[str]
Tags to remove.
"""
return self.replace_tags(tags, [])
def replace_tag(self, tag_to_remove: str, tag_to_add: str):
"""
Remove a tag and add a tag atomically. Removal is done first.
The rules for `Run.add_tag` and `Run.remove_tag` also apply here.
Parameters
----------
tag_to_remove : str
Tag to remove.
tag_to_add : str
Tag to add.
"""
# For backwards compatibility with Netflix's early version of this functionality,
# this function shall accept both individual tags AND iterables of tags.
#
# Iterable of tags support shall be removed in future once existing
# usage has been migrated off.
if is_stringish(tag_to_remove):
tag_to_remove = [tag_to_remove]
if is_stringish(tag_to_add):
tag_to_add = [tag_to_add]
return self.replace_tags(tag_to_remove, tag_to_add)
def replace_tags(self, tags_to_remove: Iterable[str], tags_to_add: Iterable[str]):
"""
Remove and add tags atomically; the removal is done first.
The rules for `Run.add_tag` and `Run.remove_tag` also apply here.
Parameters
----------
tags_to_remove : Iterable[str]
Tags to remove.
tags_to_add : Iterable[str]
Tags to add.
"""
flow_id = self.path_components[0]
final_user_tags = self._metaflow.metadata.mutate_user_tags_for_run(
flow_id, self.id, tags_to_remove=tags_to_remove, tags_to_add=tags_to_add
)
# refresh Run object with the latest tags
self._user_tags = frozenset(final_user_tags)
self._tags = frozenset([*self._user_tags, *self._system_tags])
def __iter__(self) -> Iterator[Step]:
"""
Iterate over all children Step of this Run
Yields
------
Step
A Step in this Run
"""
for s in super(Run, self).__iter__():
yield s
def __getitem__(self, name: str) -> Step:
"""
Returns the Step object with the step name 'name'
Parameters
----------
name : str
Step name
Returns
-------
Step
Step for this step name in this Run
Raises
------
KeyError
If the name does not identify a valid Step object
"""
return super(Run, self).__getitem__(name)
def __getstate__(self):
return super(Run, self).__getstate__()
def __setstate__(self, state):
super(Run, self).__setstate__(state)
@property
def trigger(self) -> Optional[Trigger]:
"""
Returns a container of events that triggered this run.
This returns None if the run was not triggered by any events.
Returns
-------
Trigger, optional
Container of triggering events
"""
if "start" in self and self["start"].task:
meta = self["start"].task.metadata_dict.get("execution-triggers")
if meta:
return Trigger(json.loads(meta))
return None
class Flow(MetaflowObject):
"""
A Flow represents all existing flows with a certain name, in other words,
classes derived from `FlowSpec`. A container of `Run` objects.
Attributes
----------
latest_run : Run
Latest `Run` (in progress or completed, successfully or not) of this flow.
latest_successful_run : Run
Latest successfully completed `Run` of this flow.
"""
_NAME = "flow"
_PARENT_CLASS = None
_CHILD_CLASS = "run"
def __init__(self, *args, **kwargs):
super(Flow, self).__init__(*args, **kwargs)
@property
def latest_run(self) -> Optional[Run]:
"""
Returns the latest run (either in progress or completed) of this flow.
Note that an in-progress run may be returned by this call. Use latest_successful_run
to get an object representing a completed successful run.
Returns
-------
Run, optional
Latest run of this flow
"""
for run in self:
return run
@property
def latest_successful_run(self) -> Optional[Run]:
"""
Returns the latest successful run of this flow.
Returns
-------
Run, optional
Latest successful run of this flow
"""
for run in self:
if run.successful:
return run
def runs(self, *tags: str) -> Iterator[Run]:
"""
Returns an iterator over all `Run`s of this flow.
An optional filter is available that allows you to filter on tags.
If multiple tags are specified, only runs that have all the
specified tags are returned.
Parameters
----------
tags : str
Tags to match.
Yields
------
Run
`Run` objects in this flow.
"""
return self._filtered_children(*tags)
def __iter__(self) -> Iterator[Task]:
"""
Iterate over all children Run of this Flow.
Note that only runs in the current namespace are returned unless
_namespace_check is False
Yields
------
Run
A Run in this Flow
"""
for r in super(Flow, self).__iter__():
yield r
def __getitem__(self, run_id: str) -> Run:
"""
Returns the Run object with the run ID 'run_id'
Parameters
----------
run_id : str
Run OD
Returns
-------
Run
Run for this run ID in this Flow
Raises
------
KeyError
If the run_id does not identify a valid Run object
"""
return super(Flow, self).__getitem__(run_id)
def __getstate__(self):
return super(Flow, self).__getstate__()
def __setstate__(self, state):
super(Flow, self).__setstate__(state)
class Metaflow(object):
"""
Entry point to all objects in the Metaflow universe.
This object can be used to list all the flows present either through the explicit property
or by iterating over this object.
Attributes
----------
flows : List[Flow]
Returns the list of all `Flow` objects known to this metadata provider. Note that only
flows present in the current namespace will be returned. A `Flow` is present in a namespace
if it has at least one run in the namespace.
"""
def __init__(self, _current_metadata: Optional[str] = None):
if _current_metadata:
provider, info = _metadata(_current_metadata)
self.metadata = provider
if info:
self.metadata.INFO = info
else:
if current_metadata is False:
default_metadata()
self.metadata = current_metadata
@property
def flows(self) -> List[Flow]:
"""
Returns a list of all the flows present.
Only flows present in the set namespace are returned. A flow is present in a namespace if
it has at least one run that is in the namespace.
Returns
-------
List[Flow]
List of all flows present.
"""
return list(self)
def __iter__(self) -> Iterator[Flow]:
"""
Iterator over all flows present.
Only flows present in the set namespace are returned. A flow is present in a
namespace if it has at least one run that is in the namespace.
Yields
-------
Flow
A Flow present in the Metaflow universe.
"""
# We do not filter on namespace in the request because
# filtering on namespace on flows means finding at least one
# run in this namespace. This is_in_namespace() function
# does this properly in this case
all_flows = self.metadata.get_object("root", "flow", None, None)
all_flows = all_flows if all_flows else []
for flow in all_flows:
try:
v = Flow(_object=flow, _metaflow=self)
yield v
except MetaflowNamespaceMismatch:
continue
def __str__(self) -> str:
return "Metaflow()"
def __getitem__(self, name: str) -> Flow:
"""
Returns a specific flow by name.
The flow will only be returned if it is present in the current namespace.
Parameters
----------
name : str
Name of the Flow
Returns
-------
Flow
Flow with the given name.
"""
return Flow(name, _metaflow=self)
def _metadata(ms: str) -> Tuple[Optional["MetadataProvider"], Optional[str]]:
infos = ms.split("@", 1)
types = [m.TYPE for m in METADATA_PROVIDERS]
if infos[0] in types:
provider = [m for m in METADATA_PROVIDERS if m.TYPE == infos[0]][0]
if len(infos) > 1:
return provider, infos[1]
return provider, None
# Deduce from ms; if starts with http, use service or else use local
if ms.startswith("http"):
metadata_type = "service"
else:
metadata_type = "local"
res = [m for m in METADATA_PROVIDERS if m.TYPE == metadata_type]
if not res:
return None, None
return res[0], ms
_CLASSES["flow"] = Flow
_CLASSES["run"] = Run
_CLASSES["step"] = Step
_CLASSES["task"] = Task
_CLASSES["artifact"] = DataArtifact
================================================
FILE: metaflow/client/filecache.py
================================================
from __future__ import print_function
from collections import OrderedDict
import json
import os
import sys
import time
from tempfile import NamedTemporaryFile
from hashlib import sha1
from urllib.parse import urlparse
from metaflow.datastore import FlowDataStore
from metaflow.datastore.content_addressed_store import BlobCache
from metaflow.datastore.flow_datastore import MetadataCache
from metaflow.exception import MetaflowException
from metaflow.metaflow_config import (
CLIENT_CACHE_PATH,
CLIENT_CACHE_MAX_SIZE,
CLIENT_CACHE_MAX_FLOWDATASTORE_COUNT,
)
from metaflow.metaflow_profile import from_start
from metaflow.plugins import DATASTORES
NEW_FILE_QUARANTINE = 10
if sys.version_info[0] >= 3 and sys.version_info[1] >= 2:
def od_move_to_end(od, key):
od.move_to_end(key)
else:
# Not very efficient but works and most people are on 3.2+
def od_move_to_end(od, key):
v = od.get(key)
del od[key]
od[key] = v
class FileCacheException(MetaflowException):
headline = "File cache error"
class FileCache(object):
def __init__(self, cache_dir=None, max_size=None):
self._cache_dir = cache_dir
self._max_size = max_size
if self._cache_dir is None:
self._cache_dir = CLIENT_CACHE_PATH
if self._max_size is None:
self._max_size = int(CLIENT_CACHE_MAX_SIZE)
self._total = 0
self._objects = None
# We have a separate blob_cache per flow and datastore type.
self._blob_caches = {}
# We also keep a cache for FlowDataStore objects because some of them
# may have long-lived persistent connections; this is purely a
# performance optimization. Uses OrderedDict to implement a kind of LRU
# cache and keep only a certain number of these caches around.
self._store_caches = OrderedDict()
# We also keep a cache of data_metadata for TaskDatastore. This is used
# when querying for sizes of artifacts. Once we have queried for the size
# of one artifact in a TaskDatastore, caching this means that any
# queries on that same TaskDatastore will be quick (since we already
# have all the metadata). We keep track of this in a file so it persists
# across processes.
@property
def cache_dir(self):
return self._cache_dir
def get_logs_stream(
self, ds_type, ds_root, stream, attempt, flow_name, run_id, step_name, task_id
):
from metaflow.mflog import LOG_SOURCES
ds = self._get_flow_datastore(ds_type, ds_root, flow_name)
task_ds = ds.get_task_datastore(
run_id, step_name, task_id, data_metadata={"objects": {}, "info": {}}
)
return task_ds.load_logs(LOG_SOURCES, stream, attempt_override=attempt)
def get_log_legacy(
self, ds_type, location, logtype, attempt, flow_name, run_id, step_name, task_id
):
ds_cls = self._get_datastore_storage_impl(ds_type)
ds_root = ds_cls.path_join(*ds_cls.path_split(location)[:-5])
cache_id = self.flow_ds_id(ds_type, ds_root, flow_name)
token = (
"%s.cached"
% sha1(
os.path.join(run_id, step_name, task_id, "%s_log" % logtype).encode(
"utf-8"
)
).hexdigest()
)
path = os.path.join(self._cache_dir, cache_id, token[:2], token)
cached_log = self.read_file(path)
if cached_log is not None:
return cached_log
ds = self._get_flow_datastore(ds_type, ds_root, flow_name)
task_ds = ds.get_task_datastore(
run_id, step_name, task_id, data_metadata={"objects": {}, "info": {}}
)
log = task_ds.load_log_legacy(logtype, attempt_override=attempt)
# Store this in the file cache as well
self.create_file(path, log)
return log
def get_legacy_log_size(
self, ds_type, location, logtype, attempt, flow_name, run_id, step_name, task_id
):
ds_cls = self._get_datastore_storage_impl(ds_type)
ds_root = ds_cls.path_join(*ds_cls.path_split(location)[:-5])
ds = self._get_flow_datastore(ds_type, ds_root, flow_name)
task_ds = ds.get_task_datastore(
run_id,
step_name,
task_id,
attempt=attempt,
data_metadata={"objects": {}, "info": {}},
)
return task_ds.get_legacy_log_size(logtype)
def get_log_size(
self, ds_type, ds_root, logtype, attempt, flow_name, run_id, step_name, task_id
):
from metaflow.mflog import LOG_SOURCES
ds = self._get_flow_datastore(ds_type, ds_root, flow_name)
task_ds = ds.get_task_datastore(
run_id,
step_name,
task_id,
attempt=attempt,
data_metadata={"objects": {}, "info": {}},
)
return task_ds.get_log_size(LOG_SOURCES, logtype)
def get_data(self, ds_type, flow_name, location, key):
ds_cls = self._get_datastore_storage_impl(ds_type)
ds_root = ds_cls.get_datastore_root_from_location(location, flow_name)
ds = self._get_flow_datastore(ds_type, ds_root, flow_name)
return next(ds.load_data([key], force_raw=True))
def get_artifact_size_by_location(
self, ds_type, location, attempt, flow_name, run_id, step_name, task_id, name
):
"""Gets the size of the artifact content (in bytes) for the name at the location"""
ds_cls = self._get_datastore_storage_impl(ds_type)
ds_root = ds_cls.get_datastore_root_from_location(location, flow_name)
return self.get_artifact_size(
ds_type, ds_root, attempt, flow_name, run_id, step_name, task_id, name
)
def get_artifact_size(
self, ds_type, ds_root, attempt, flow_name, run_id, step_name, task_id, name
):
"""Gets the size of the artifact content (in bytes) for the name"""
task_ds = self._get_task_datastore(
ds_type, ds_root, flow_name, run_id, step_name, task_id, attempt
)
_, size = next(task_ds.get_artifact_sizes([name]))
return size
def get_artifact_by_location(
self,
ds_type,
location,
data_metadata,
flow_name,
run_id,
step_name,
task_id,
name,
):
ds_cls = self._get_datastore_storage_impl(ds_type)
ds_root = ds_cls.get_datastore_root_from_location(location, flow_name)
return self.get_artifact(
ds_type, ds_root, data_metadata, flow_name, run_id, step_name, task_id, name
)
def get_artifact(
self,
ds_type,
ds_root,
data_metadata,
flow_name,
run_id,
step_name,
task_id,
name,
):
_, obj = next(
self.get_artifacts(
ds_type,
ds_root,
data_metadata,
flow_name,
run_id,
step_name,
task_id,
[name],
)
)
return obj
def get_all_artifacts(
self, ds_type, ds_root, data_metadata, flow_name, run_id, step_name, task_id
):
ds = self._get_flow_datastore(ds_type, ds_root, flow_name)
# We get the task datastore for this task
task_ds = ds.get_task_datastore(
run_id, step_name, task_id, data_metadata=data_metadata
)
# This will reuse the blob cache if needed. We do not have an
# artifact cache so the unpickling happens every time here.
return task_ds.load_artifacts([n for n, _ in task_ds.items()])
def get_artifacts(
self,
ds_type,
ds_root,
data_metadata,
flow_name,
run_id,
step_name,
task_id,
names,
):
ds = self._get_flow_datastore(ds_type, ds_root, flow_name)
# We get the task datastore for this task
task_ds = ds.get_task_datastore(
run_id, step_name, task_id, data_metadata=data_metadata
)
# note that load_artifacts uses flow_datastore.castore which goes
# through one of the self._blob_cache
return task_ds.load_artifacts(names)
def create_file(self, path, value):
if self._objects is None:
# Index objects lazily (when we first need to write to it).
# This can be an expensive operation
self._index_objects()
dirname = os.path.dirname(path)
try:
FileCache._makedirs(dirname)
except: # noqa E722
raise FileCacheException("Could not create directory: %s" % dirname)
tmpfile = NamedTemporaryFile(dir=dirname, prefix="dlobj", delete=False)
# Now write out the file
try:
tmpfile.write(value)
tmpfile.flush()
os.rename(tmpfile.name, path)
except: # noqa E722
os.unlink(tmpfile.name)
raise
size = os.path.getsize(path)
self._total += size
self._objects.append((int(time.time()), size, path))
self._garbage_collect()
def read_file(self, path):
if os.path.exists(path):
try:
with open(path, "rb") as f:
return f.read()
except IOError:
# It may have been concurrently garbage collected by another
# process
pass
return None
def _index_objects(self):
objects = []
if os.path.exists(self._cache_dir):
for flow_ds_id in os.listdir(self._cache_dir):
root = os.path.join(self._cache_dir, flow_ds_id)
if not os.path.isdir(root):
continue
for subdir in os.listdir(root):
root = os.path.join(self._cache_dir, flow_ds_id, subdir)
if not os.path.isdir(root):
continue
for obj in os.listdir(root):
sha, ext = os.path.splitext(obj)
if ext in ["cached", "blob"]:
path = os.path.join(root, obj)
objects.insert(
0, (os.path.getctime(path), os.path.getsize(path), path)
)
self._total = sum(size for _, size, _ in objects)
self._objects = sorted(objects, reverse=False)
@staticmethod
def flow_ds_id(ds_type, ds_root, flow_name):
p = urlparse(ds_root)
sanitized_root = (p.netloc + p.path).replace("/", "_")
return ".".join([ds_type, sanitized_root, flow_name])
@staticmethod
def task_ds_id(ds_type, ds_root, flow_name, run_id, step_name, task_id, attempt):
p = urlparse(ds_root)
sanitized_root = (p.netloc + p.path).replace("/", "_")
return ".".join(
[
ds_type,
sanitized_root,
flow_name,
run_id,
step_name,
task_id,
str(attempt),
]
)
def _garbage_collect(self):
now = time.time()
while self._objects and self._total > self._max_size * 1024**2:
if now - self._objects[0][0] < NEW_FILE_QUARANTINE:
break
ctime, size, path = self._objects.pop(0)
self._total -= size
try:
os.remove(path)
except OSError:
# maybe another client had already GC'ed the file away
pass
@staticmethod
def _makedirs(path):
# this is for python2 compatibility.
# Python3 has os.makedirs(exist_ok=True).
try:
os.makedirs(path)
except OSError as x:
if x.errno == 17:
return
else:
raise
@staticmethod
def _get_datastore_storage_impl(ds_type):
storage_impl = [d for d in DATASTORES if d.TYPE == ds_type]
if len(storage_impl) == 0:
raise FileCacheException("Datastore %s was not found" % ds_type)
return storage_impl[0]
def _get_flow_datastore(self, ds_type, ds_root, flow_name):
cache_id = self.flow_ds_id(ds_type, ds_root, flow_name)
cached_flow_datastore = self._store_caches.get(cache_id)
if cached_flow_datastore:
od_move_to_end(self._store_caches, cache_id)
return cached_flow_datastore
else:
storage_impl = self._get_datastore_storage_impl(ds_type)
cached_flow_datastore = FlowDataStore(
flow_name=flow_name,
environment=None, # TODO: Add environment here
storage_impl=storage_impl,
ds_root=ds_root,
)
blob_cache = self._blob_caches.setdefault(
cache_id,
(
FileBlobCache(self, cache_id),
TaskMetadataCache(self, ds_type, ds_root, flow_name),
),
)
cached_flow_datastore.ca_store.set_blob_cache(blob_cache[0])
cached_flow_datastore.set_metadata_cache(blob_cache[1])
self._store_caches[cache_id] = cached_flow_datastore
if len(self._store_caches) > CLIENT_CACHE_MAX_FLOWDATASTORE_COUNT:
cache_id_to_remove, _ = self._store_caches.popitem(last=False)
del self._blob_caches[cache_id_to_remove]
return cached_flow_datastore
def _get_task_datastore(
self, ds_type, ds_root, flow_name, run_id, step_name, task_id, attempt
):
flow_ds = self._get_flow_datastore(ds_type, ds_root, flow_name)
return flow_ds.get_task_datastore(run_id, step_name, task_id, attempt=attempt)
class TaskMetadataCache(MetadataCache):
def __init__(self, filecache, ds_type, ds_root, flow_name):
self._filecache = filecache
self._ds_type = ds_type
self._ds_root = ds_root
self._flow_name = flow_name
def _path(self, run_id, step_name, task_id, attempt):
if attempt is None:
raise MetaflowException(
"Attempt number must be specified to use task metadata cache. Raise an issue "
"on Metaflow GitHub if you see this message.",
)
cache_id = self._filecache.task_ds_id(
self._ds_type,
self._ds_root,
self._flow_name,
run_id,
step_name,
task_id,
attempt,
)
token = (
"%s.cached"
% sha1(
os.path.join(
run_id, step_name, task_id, str(attempt), "metadata"
).encode("utf-8")
).hexdigest()
)
return os.path.join(self._filecache.cache_dir, cache_id, token[:2], token)
def load_metadata(self, run_id, step_name, task_id, attempt):
d = self._filecache.read_file(self._path(run_id, step_name, task_id, attempt))
if d:
return json.loads(d)
def store_metadata(self, run_id, step_name, task_id, attempt, metadata_dict):
self._filecache.create_file(
self._path(run_id, step_name, task_id, attempt),
json.dumps(metadata_dict).encode("utf-8"),
)
class FileBlobCache(BlobCache):
def __init__(self, filecache, cache_id):
self._filecache = filecache
self._cache_id = cache_id
def _path(self, key):
key_dir = key[:2]
return os.path.join(
self._filecache.cache_dir, self._cache_id, key_dir, "%s.blob" % key
)
def load_key(self, key):
return self._filecache.read_file(self._path(key))
def store_key(self, key, blob):
self._filecache.create_file(self._path(key), blob)
================================================
FILE: metaflow/clone_util.py
================================================
import time
from .metadata_provider import MetaDatum
def clone_task_helper(
flow_name,
clone_run_id,
run_id,
step_name,
clone_task_id,
task_id,
flow_datastore,
metadata_service,
origin_ds_set=None,
attempt_id=0,
):
# 1. initialize output datastore
output = flow_datastore.get_task_datastore(
run_id, step_name, task_id, attempt=attempt_id, mode="w"
)
output.init_task()
origin_run_id, origin_step_name, origin_task_id = (
clone_run_id,
step_name,
clone_task_id,
)
# 2. initialize origin datastore
origin = None
if origin_ds_set:
origin = origin_ds_set.get_with_pathspec(
"{}/{}/{}".format(origin_run_id, origin_step_name, origin_task_id)
)
else:
origin = flow_datastore.get_task_datastore(
origin_run_id, origin_step_name, origin_task_id
)
metadata_tags = ["attempt_id:{0}".format(attempt_id)]
output.clone(origin)
_ = metadata_service.register_task_id(
run_id,
step_name,
task_id,
attempt_id,
)
metadata_service.register_metadata(
run_id,
step_name,
task_id,
[
MetaDatum(
field="origin-task-id",
value=str(origin_task_id),
type="origin-task-id",
tags=metadata_tags,
),
MetaDatum(
field="origin-run-id",
value=str(origin_run_id),
type="origin-run-id",
tags=metadata_tags,
),
MetaDatum(
field="attempt",
value=str(attempt_id),
type="attempt",
tags=metadata_tags,
),
MetaDatum(
field="attempt_ok",
value="True", # During clone, the task is always considered successful.
type="internal_attempt_status",
tags=metadata_tags,
),
],
)
output.done()
================================================
FILE: metaflow/cmd/__init__.py
================================================
================================================
FILE: metaflow/cmd/code/__init__.py
================================================
import os
import shutil
import sys
from subprocess import PIPE, CompletedProcess, run
from tempfile import TemporaryDirectory
from typing import Any, Callable, List, Mapping, Optional, cast
from metaflow import Run
from metaflow.util import walk_without_cycles
from metaflow._vendor import click
from metaflow.cli import echo_always
@click.group()
def cli():
pass
@cli.group(help="Access, compare, and manage code associated with Metaflow runs.")
def code():
pass
def echo(line: str) -> None:
echo_always(line, err=True, fg="magenta")
def extract_code_package(runspec: str) -> TemporaryDirectory:
try:
mf_run = Run(runspec, _namespace_check=False)
echo(f"✅ Run *{runspec}* found, downloading code..")
except Exception as e:
echo(f"❌ Run **{runspec}** not found")
raise e
if mf_run.code is None:
echo(
f"❌ Run **{runspec}** doesn't have a code package. Maybe it's a local run?"
)
raise RuntimeError("no code package found")
return mf_run.code.extract()
def perform_diff(
source_dir: str,
target_dir: Optional[str] = None,
output: bool = False,
**kwargs: Mapping[str, Any],
) -> Optional[List[str]]:
if target_dir is None:
target_dir = os.getcwd()
diffs = []
for dirpath, _, filenames in walk_without_cycles(source_dir):
for fname in filenames:
# NOTE: the paths below need to be set up carefully
# for the `patch` command to work. Better not to touch
# the directories below. If you must, test that patches
# work after your changes.
#
# target_file is the git repo in the current working directory
rel = os.path.relpath(dirpath, source_dir)
target_file = os.path.join(rel, fname)
# source_file is the run file loaded in a tmp directory
source_file = os.path.join(dirpath, fname)
if sys.stdout.isatty() and not output:
color = ["--color"]
else:
color = ["--no-color"]
if os.path.exists(os.path.join(target_dir, target_file)):
cmd = (
["git", "diff", "--no-index", "--exit-code"]
+ color
+ [
target_file,
source_file,
]
)
result: CompletedProcess = run(
cmd, text=True, stdout=PIPE, cwd=target_dir
)
if result.returncode == 0:
if not output:
echo(f"✅ {target_file} is identical, skipping")
continue
if output:
diffs.append(result.stdout)
else:
run(["less", "-R"], input=result.stdout, text=True)
else:
if not output:
echo(f"❗ {target_file} not in the target directory, skipping")
return diffs if output else None
def run_op(
runspec: str, op: Callable[..., Optional[List[str]]], **op_args: Mapping[str, Any]
) -> Optional[List[str]]:
tmp = None
try:
tmp = extract_code_package(runspec)
return op(tmp.name, **op_args)
finally:
if tmp and os.path.exists(tmp.name):
shutil.rmtree(tmp.name)
def run_op_diff_runs(
source_run_pathspec: str, target_run_pathspec: str, **op_args: Mapping[str, Any]
) -> Optional[List[str]]:
source_tmp = None
target_tmp = None
try:
source_tmp = extract_code_package(source_run_pathspec)
target_tmp = extract_code_package(target_run_pathspec)
return perform_diff(source_tmp.name, target_tmp.name, **op_args)
finally:
for d in [source_tmp, target_tmp]:
if d and os.path.exists(d.name):
shutil.rmtree(d.name)
def op_diff(tmpdir: str, **kwargs: Mapping[str, Any]) -> Optional[List[str]]:
kwargs_dict = dict(kwargs)
target_dir = cast(Optional[str], kwargs_dict.pop("target_dir", None))
output: bool = bool(kwargs_dict.pop("output", False))
op_args: Mapping[str, Any] = {**kwargs_dict}
return perform_diff(tmpdir, target_dir=target_dir, output=output, **op_args)
def op_pull(tmpdir: str, dst: str, **op_args: Mapping[str, Any]) -> None:
if os.path.exists(dst):
echo(f"❌ Directory *{dst}* already exists")
else:
shutil.move(tmpdir, dst)
echo(f"Code downloaded to *{dst}*")
def op_patch(tmpdir: str, dst: str, **kwargs: Mapping[str, Any]) -> None:
diffs = perform_diff(tmpdir, output=True) or []
with open(dst, "w", encoding="utf-8") as f:
for out in diffs:
out = out.replace(tmpdir, "/.")
out = out.replace("+++ b/./", "+++ b/")
out = out.replace("--- b/./", "--- b/")
out = out.replace("--- a/./", "--- a/")
out = out.replace("+++ a/./", "+++ a/")
f.write(out)
echo(f"Patch saved in *{dst}*")
path = run(
["git", "rev-parse", "--show-prefix"], text=True, stdout=PIPE
).stdout.strip()
if path:
diropt = f" --directory={path.rstrip('/')}"
else:
diropt = ""
echo("Apply the patch by running:")
echo_always(
f"git apply --verbose{diropt} {dst}", highlight=True, bold=True, err=True
)
@code.command()
@click.argument("run_pathspec")
def diff(run_pathspec: str, **kwargs: Mapping[str, Any]) -> None:
"""
Do a 'git diff' of the current directory and a Metaflow run.
"""
_ = run_op(run_pathspec, op_diff, **kwargs)
@code.command()
@click.argument("source_run_pathspec")
@click.argument("target_run_pathspec")
def diff_runs(
source_run_pathspec: str, target_run_pathspec: str, **kwargs: Mapping[str, Any]
) -> None:
"""
Do a 'git diff' between two Metaflow runs.
"""
_ = run_op_diff_runs(source_run_pathspec, target_run_pathspec, **kwargs)
@code.command()
@click.argument("run_pathspec")
@click.option(
"--dir", help="Destination directory (default: {run_pathspec}_code)", default=None
)
def pull(
run_pathspec: str, dir: Optional[str] = None, **kwargs: Mapping[str, Any]
) -> None:
"""
Pull the code of a Metaflow run.
"""
if dir is None:
dir = run_pathspec.lower().replace("/", "_") + "_code"
op_args: Mapping[str, Any] = {**kwargs, "dst": dir}
run_op(run_pathspec, op_pull, **op_args)
@code.command()
@click.argument("run_pathspec")
@click.option(
"--file_path",
help="Patch file name. If not provided, defaults to a sanitized version of RUN_PATHSPEC "
"with slashes replaced by underscores, plus '.patch'.",
show_default=False,
)
@click.option(
"--overwrite", is_flag=True, help="Overwrite the patch file if it exists."
)
def patch(
run_pathspec: str,
file_path: Optional[str] = None,
overwrite: bool = False,
**kwargs: Mapping[str, Any],
) -> None:
"""
Create a patch by comparing current dir with a Metaflow run.
"""
if file_path is None:
file_path = run_pathspec.lower().replace("/", "_") + ".patch"
if os.path.exists(file_path) and not overwrite:
echo(f"File *{file_path}* already exists. To overwrite, specify --overwrite.")
return
op_args: Mapping[str, Any] = {**kwargs, "dst": file_path}
run_op(run_pathspec, op_patch, **op_args)
================================================
FILE: metaflow/cmd/configure_cmd.py
================================================
import json
import os
import sys
from os.path import expanduser
from metaflow.util import to_unicode
from metaflow._vendor import click
from metaflow.util import to_unicode
from .util import echo_always, makedirs
echo = echo_always
# NOTE: This code needs to be in sync with metaflow/metaflow_config.py.
METAFLOW_CONFIGURATION_DIR = expanduser(
os.environ.get("METAFLOW_HOME", "~/.metaflowconfig")
)
METAFLOW_PROFILE = os.environ.get("METAFLOW_PROFILE", "")
@click.group()
def cli():
pass
@cli.group(help="Configure Metaflow to access the cloud.")
def configure():
makedirs(METAFLOW_CONFIGURATION_DIR)
def get_config_path(profile):
config_file = "config.json" if not profile else ("config_%s.json" % profile)
path = os.path.join(METAFLOW_CONFIGURATION_DIR, config_file)
return path
def confirm_overwrite_config(profile):
path = get_config_path(profile)
if os.path.exists(path):
if not click.confirm(
click.style(
"We found an existing configuration for your "
+ "profile. Do you want to modify the existing "
+ "configuration?",
fg="red",
bold=True,
)
):
echo(
"You can configure a different named profile by using the "
"--profile argument. You can activate this profile by setting "
"the environment variable METAFLOW_PROFILE to the named "
"profile.",
fg="yellow",
)
return False
return True
def check_for_missing_profile(profile):
path = get_config_path(profile)
# Absence of default config is equivalent to running locally.
if profile and not os.path.exists(path):
raise click.ClickException(
"Couldn't find configuration for profile "
+ click.style('"%s"' % profile, fg="red")
+ " in "
+ click.style('"%s"' % path, fg="red")
)
def get_env(profile):
path = get_config_path(profile)
if os.path.exists(path):
with open(path) as f:
return json.load(f)
return {}
def persist_env(env_dict, profile):
# TODO: Should we persist empty env_dict or notify user differently?
path = get_config_path(profile)
with open(path, "w") as f:
json.dump(env_dict, f, indent=4, sort_keys=True)
echo("\nConfiguration successfully written to ", nl=False, bold=True)
echo('"%s"' % path, fg="cyan")
@configure.command(help="Reset configuration to disable cloud access.")
@click.option(
"--profile", "-p", default=METAFLOW_PROFILE, help="Optional named profile."
)
def reset(profile):
check_for_missing_profile(profile)
path = get_config_path(profile)
if os.path.exists(path):
if click.confirm(
"Do you really wish to reset the configuration in "
+ click.style('"%s"' % path, fg="cyan"),
abort=True,
):
os.remove(path)
echo("Configuration successfully reset to run locally.")
else:
echo("Configuration is already reset to run locally.")
@configure.command(help="Show existing configuration.")
@click.option(
"--profile", "-p", default=METAFLOW_PROFILE, help="Optional named profile."
)
def show(profile):
check_for_missing_profile(profile)
path = get_config_path(profile)
env_dict = {}
if os.path.exists(path):
with open(path, "r") as f:
env_dict = json.load(f)
if env_dict:
echo("Showing configuration in ", nl=False)
echo('"%s"\n' % path, fg="cyan")
for k, v in env_dict.items():
echo("%s=%s" % (k, v))
else:
echo("Configuration is set to run locally.")
@configure.command(help="Export configuration to a file.")
@click.option(
"--profile",
"-p",
default=METAFLOW_PROFILE,
help="Optional named profile whose configuration must be " "exported.",
)
@click.argument("output_filename", type=click.Path(resolve_path=True))
def export(profile, output_filename):
check_for_missing_profile(profile)
# Export its contents to a new file.
path = get_config_path(profile)
env_dict = {}
if os.path.exists(path):
with open(path, "r") as f:
env_dict = json.load(f)
# resolve_path doesn't expand `~` in `path`.
output_path = expanduser(output_filename)
if os.path.exists(output_path):
if click.confirm(
"Do you wish to overwrite the contents in "
+ click.style('"%s"' % output_path, fg="cyan")
+ "?",
abort=True,
):
pass
# Write to file.
with open(output_path, "w") as f:
json.dump(env_dict, f, indent=4, sort_keys=True)
echo("Configuration successfully exported to: ", nl=False)
echo('"%s"' % output_path, fg="cyan")
@configure.command(help="Import configuration from a file.", name="import")
@click.option(
"--profile",
"-p",
default=METAFLOW_PROFILE,
help="Optional named profile to which the configuration must be " "imported into.",
)
@click.argument("input_filename", type=click.Path(exists=True, resolve_path=True))
def import_from(profile, input_filename):
check_for_missing_profile(profile)
# Import configuration.
input_path = expanduser(input_filename)
env_dict = {}
with open(input_path, "r") as f:
env_dict = json.load(f)
echo("Configuration successfully read from: ", nl=False)
echo('"%s"' % input_path, fg="cyan")
# Persist configuration.
confirm_overwrite_config(profile)
persist_env(env_dict, profile)
@configure.command(help="Configure metaflow to access hosted sandbox.")
@click.option(
"--profile",
"-p",
default="",
help="Configure a named profile. Activate the profile by setting "
"`METAFLOW_PROFILE` environment variable.",
)
@click.option(
"--overwrite/--no-overwrite",
"-o/",
default=False,
show_default=True,
help="Overwrite profile configuration without asking",
)
def sandbox(profile, overwrite):
if not overwrite:
confirm_overwrite_config(profile)
# Prompt for user input.
encoded_str = click.prompt(
"Following instructions from "
"https://metaflow.org/sandbox, "
"please paste the encoded magic string",
type=str,
)
# Decode the bytes to env_dict.
try:
import base64
import zlib
from metaflow.util import to_bytes
env_dict = json.loads(
to_unicode(zlib.decompress(base64.b64decode(to_bytes(encoded_str))))
)
except:
# TODO: Add the URL for contact us page in the error?
raise click.BadArgumentUsage(
"Could not decode the sandbox " "configuration. Please contact us."
)
# Persist to a file.
persist_env(env_dict, profile)
def cyan(string):
return click.style(string, fg="cyan")
def yellow(string):
return click.style(string, fg="yellow")
def red(string):
return click.style(string, fg="red")
def configure_s3_datastore(existing_env):
env = {}
# Set Amazon S3 as default datastore.
env["METAFLOW_DEFAULT_DATASTORE"] = "s3"
# Set Amazon S3 folder for datastore.
env["METAFLOW_DATASTORE_SYSROOT_S3"] = click.prompt(
cyan("[METAFLOW_DATASTORE_SYSROOT_S3]")
+ " Amazon S3 folder for Metaflow artifact storage "
+ "(s3:///).",
default=existing_env.get("METAFLOW_DATASTORE_SYSROOT_S3"),
show_default=True,
)
# Set Amazon S3 folder for datatools.
env["METAFLOW_DATATOOLS_S3ROOT"] = click.prompt(
cyan("[METAFLOW_DATATOOLS_S3ROOT]")
+ yellow(" (optional)")
+ " Amazon S3 folder for Metaflow datatools "
+ "(s3:///).",
default=existing_env.get(
"METAFLOW_DATATOOLS_S3ROOT",
os.path.join(env["METAFLOW_DATASTORE_SYSROOT_S3"], "data"),
),
show_default=True,
)
return env
def configure_azure_datastore(existing_env):
env = {}
# Set Azure Blob Storage as default datastore.
env["METAFLOW_DEFAULT_DATASTORE"] = "azure"
# Set Azure Blob Storage folder for datastore.
# TODO rename this Blob Endpoint!
env["METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT"] = click.prompt(
cyan("[METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT]")
+ " Azure Storage Account URL, for the account holding the Blob container to be used. "
+ "(E.g. https://.blob.core.windows.net/)",
default=existing_env.get("METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT"),
show_default=True,
)
env["METAFLOW_DATASTORE_SYSROOT_AZURE"] = click.prompt(
cyan("[METAFLOW_DATASTORE_SYSROOT_AZURE]")
+ " Azure Blob Storage folder for Metaflow artifact storage "
+ "(Format: /)",
default=existing_env.get("METAFLOW_DATASTORE_SYSROOT_AZURE"),
show_default=True,
)
return env
def configure_gs_datastore(existing_env):
env = {}
# Set Google Cloud Storage as default datastore.
env["METAFLOW_DEFAULT_DATASTORE"] = "gs"
# Set Google Cloud Storage folder for datastore.
env["METAFLOW_DATASTORE_SYSROOT_GS"] = click.prompt(
cyan("[METAFLOW_DATASTORE_SYSROOT_GS]")
+ " Google Cloud Storage folder for Metaflow artifact storage "
+ "(Format: gs:///)",
default=existing_env.get("METAFLOW_DATASTORE_SYSROOT_GS"),
show_default=True,
)
return env
def configure_metadata_service(existing_env):
empty_profile = False
if not existing_env:
empty_profile = True
env = {}
# Set Metadata Service as default.
env["METAFLOW_DEFAULT_METADATA"] = "service"
# Set URL for the Metadata Service.
env["METAFLOW_SERVICE_URL"] = click.prompt(
cyan("[METAFLOW_SERVICE_URL]") + " URL for Metaflow Service.",
default=existing_env.get("METAFLOW_SERVICE_URL"),
show_default=True,
)
# Set internal URL for the Metadata Service.
env["METAFLOW_SERVICE_INTERNAL_URL"] = click.prompt(
cyan("[METAFLOW_SERVICE_INTERNAL_URL]")
+ yellow(" (optional)")
+ " URL for Metaflow Service "
+ "(Accessible only within VPC [AWS] or a Kubernetes cluster [if the service runs in one]).",
default=existing_env.get(
"METAFLOW_SERVICE_INTERNAL_URL", env["METAFLOW_SERVICE_URL"]
),
show_default=True,
)
# Set Auth Key for the Metadata Service.
env["METAFLOW_SERVICE_AUTH_KEY"] = click.prompt(
cyan("[METAFLOW_SERVICE_AUTH_KEY]")
+ yellow(" (optional)")
+ " Auth Key for Metaflow Service.",
default=existing_env.get("METAFLOW_SERVICE_AUTH_KEY", ""),
show_default=True,
)
return env
def configure_azure_datastore_and_metadata(existing_env):
empty_profile = False
if not existing_env:
empty_profile = True
env = {}
# Configure Azure Blob Storage as the datastore.
use_azure_as_datastore = click.confirm(
"\nMetaflow can use "
+ yellow("Azure Blob Storage as the storage backend")
+ " for all code and data artifacts on "
+ "Azure.\nAzure Blob Storage is a strict requirement if you "
+ "intend to execute your flows on a Kubernetes cluster on Azure (AKS or self-managed)"
+ ".\nWould you like to configure Azure Blob Storage "
+ "as the default storage backend?",
default=empty_profile
or existing_env.get("METAFLOW_DEFAULT_DATASTORE", "") == "azure",
abort=False,
)
if use_azure_as_datastore:
env.update(configure_azure_datastore(existing_env))
# Configure Metadata service for tracking.
if click.confirm(
"\nMetaflow can use a "
+ yellow("remote Metadata Service to track")
+ " and persist flow execution metadata.\nConfiguring the "
"service is a requirement if you intend to schedule your "
"flows with Kubernetes on Azure (AKS or self-managed).\nWould you like to "
"configure the Metadata Service?",
default=empty_profile
or existing_env.get("METAFLOW_DEFAULT_METADATA", "") == "service",
abort=False,
):
env.update(configure_metadata_service(existing_env))
return env
def configure_gs_datastore_and_metadata(existing_env):
empty_profile = False
if not existing_env:
empty_profile = True
env = {}
# Configure Google Cloud Storage as the datastore.
use_gs_as_datastore = click.confirm(
"\nMetaflow can use "
+ yellow("Google Cloud Storage as the storage backend")
+ " for all code and data artifacts on "
+ "Google Cloud Storage.\nGoogle Cloud Storage is a strict requirement if you "
+ "intend to execute your flows on a Kubernetes cluster on GCP (GKE or self-managed)"
+ ".\nWould you like to configure Google Cloud Storage "
+ "as the default storage backend?",
default=empty_profile
or existing_env.get("METAFLOW_DEFAULT_DATASTORE", "") == "gs",
abort=False,
)
if use_gs_as_datastore:
env.update(configure_gs_datastore(existing_env))
# Configure Metadata service for tracking.
if click.confirm(
"\nMetaflow can use a "
+ yellow("remote Metadata Service to track")
+ " and persist flow execution metadata.\nConfiguring the "
"service is a requirement if you intend to schedule your "
"flows with Kubernetes on GCP (GKE or self-managed).\nWould you like to "
"configure the Metadata Service?",
default=empty_profile
or existing_env.get("METAFLOW_DEFAULT_METADATA", "") == "service",
abort=False,
):
env.update(configure_metadata_service(existing_env))
return env
def configure_aws_datastore_and_metadata(existing_env):
empty_profile = False
if not existing_env:
empty_profile = True
env = {}
# Configure Amazon S3 as the datastore.
use_s3_as_datastore = click.confirm(
"\nMetaflow can use "
+ yellow("Amazon S3 as the storage backend")
+ " for all code and data artifacts on "
+ "AWS.\nAmazon S3 is a strict requirement if you "
+ "intend to execute your flows on AWS Batch "
+ "and/or schedule them on AWS Step "
+ "Functions.\nWould you like to configure Amazon "
+ "S3 as the default storage backend?",
default=empty_profile
or existing_env.get("METAFLOW_DEFAULT_DATASTORE", "") == "s3",
abort=False,
)
if use_s3_as_datastore:
env.update(configure_s3_datastore(existing_env))
# Configure Metadata service for tracking.
if click.confirm(
"\nMetaflow can use a "
+ yellow("remote Metadata Service to track")
+ " and persist flow execution metadata.\nConfiguring the "
"service is a requirement if you intend to schedule your "
"flows with AWS Step Functions.\nWould you like to "
"configure the Metadata Service?",
default=empty_profile
or existing_env.get("METAFLOW_DEFAULT_METADATA", "") == "service"
or "METAFLOW_SFN_IAM_ROLE" in env,
abort=False,
):
env.update(configure_metadata_service(existing_env))
return env
def configure_aws_batch(existing_env):
empty_profile = False
if not existing_env:
empty_profile = True
env = {}
# Set AWS Batch Job Queue.
env["METAFLOW_BATCH_JOB_QUEUE"] = click.prompt(
cyan("[METAFLOW_BATCH_JOB_QUEUE]") + " AWS Batch Job Queue.",
default=existing_env.get("METAFLOW_BATCH_JOB_QUEUE"),
show_default=True,
)
# Set IAM role for AWS Batch jobs to assume.
env["METAFLOW_ECS_S3_ACCESS_IAM_ROLE"] = click.prompt(
cyan("[METAFLOW_ECS_S3_ACCESS_IAM_ROLE]")
+ " IAM role for AWS Batch jobs to access AWS "
+ "resources (Amazon S3 etc.).",
default=existing_env.get("METAFLOW_ECS_S3_ACCESS_IAM_ROLE"),
show_default=True,
)
# Set default Docker repository for AWS Batch jobs.
env["METAFLOW_BATCH_CONTAINER_REGISTRY"] = click.prompt(
cyan("[METAFLOW_BATCH_CONTAINER_REGISTRY]")
+ yellow(" (optional)")
+ " Default Docker image repository for AWS "
+ "Batch jobs. If nothing is specified, "
+ "dockerhub (hub.docker.com/) is "
+ "used as default.",
default=existing_env.get("METAFLOW_BATCH_CONTAINER_REGISTRY", ""),
show_default=True,
)
# Set default Docker image for AWS Batch jobs.
env["METAFLOW_BATCH_CONTAINER_IMAGE"] = click.prompt(
cyan("[METAFLOW_BATCH_CONTAINER_IMAGE]")
+ yellow(" (optional)")
+ " Default Docker image for AWS Batch jobs. "
+ "If nothing is specified, an appropriate "
+ "python image is used as default.",
default=existing_env.get("METAFLOW_BATCH_CONTAINER_IMAGE", ""),
show_default=True,
)
# Configure AWS Step Functions for scheduling.
if click.confirm(
"\nMetaflow can "
+ yellow("schedule your flows on AWS Step " "Functions")
+ " and trigger them at a specific cadence using "
"Amazon EventBridge.\nTo support flows involving "
"foreach steps, you would need access to AWS "
"DynamoDB.\nWould you like to configure AWS Step "
"Functions for scheduling?",
default=empty_profile or "METAFLOW_SFN_IAM_ROLE" in existing_env,
abort=False,
):
# Configure IAM role for AWS Step Functions.
env["METAFLOW_SFN_IAM_ROLE"] = click.prompt(
cyan("[METAFLOW_SFN_IAM_ROLE]")
+ " IAM role for AWS Step Functions to "
+ "access AWS resources (AWS Batch, "
+ "AWS DynamoDB).",
default=existing_env.get("METAFLOW_SFN_IAM_ROLE"),
show_default=True,
)
# Configure IAM role for AWS Events Bridge.
env["METAFLOW_EVENTS_SFN_ACCESS_IAM_ROLE"] = click.prompt(
cyan("[METAFLOW_EVENTS_SFN_ACCESS_IAM_ROLE]")
+ " IAM role for Amazon EventBridge to "
+ "access AWS Step Functions.",
default=existing_env.get("METAFLOW_EVENTS_SFN_ACCESS_IAM_ROLE"),
show_default=True,
)
# Configure AWS DynamoDB Table for AWS Step Functions.
env["METAFLOW_SFN_DYNAMO_DB_TABLE"] = click.prompt(
cyan("[METAFLOW_SFN_DYNAMO_DB_TABLE]")
+ " AWS DynamoDB table name for tracking "
+ "AWS Step Functions execution metadata.",
default=existing_env.get("METAFLOW_SFN_DYNAMO_DB_TABLE"),
show_default=True,
)
return env
def check_kubernetes_client(ctx):
try:
import kubernetes
except ImportError:
echo(
"Could not import module 'Kubernetes'.\nInstall Kubernetes "
+ "Python package (https://pypi.org/project/kubernetes/) first.\n"
"You can install the module by executing - \n"
+ yellow("%s -m pip install kubernetes" % sys.executable)
+ " \nor equivalent in your favorite Python package manager\n"
)
ctx.abort()
def check_kubernetes_config(ctx):
from kubernetes import config
try:
all_contexts, current_context = config.list_kube_config_contexts()
click.confirm(
"You have a valid kubernetes configuration. The current context is set to "
+ yellow(current_context["name"])
+ " "
+ "Proceed?",
default=True,
abort=True,
)
except config.config_exception.ConfigException as e:
click.confirm(
"\nYou don't seem to have a valid Kubernetes configuration file. "
+ "The error from Kubernetes client library: "
+ red(str(e))
+ "."
+ "To create a kubernetes configuration for EKS, you typically need to run "
+ yellow("aws eks update-kubeconfig --name ")
+ ". For further details, refer to AWS documentation at https://docs.aws.amazon.com/eks/latest/userguide/create-kubeconfig.html\n"
"Do you want to proceed with configuring Metaflow for Kubernetes anyway?",
default=False,
abort=True,
)
def configure_argo_events(existing_env):
env = {}
# Argo events service account
env["METAFLOW_ARGO_EVENTS_SERVICE_ACCOUNT"] = click.prompt(
cyan("[METAFLOW_ARGO_EVENTS_SERVICE_ACCOUNT]")
+ " Service Account for Argo Events. ",
default=existing_env.get("METAFLOW_ARGO_EVENTS_SERVICE_ACCOUNT", ""),
show_default=True,
)
# Argo events event bus
env["METAFLOW_ARGO_EVENTS_EVENT_BUS"] = click.prompt(
cyan("[METAFLOW_ARGO_EVENTS_EVENT_BUS]")
+ yellow(" (optional)")
+ " Event Bus for Argo Events.",
default=existing_env.get("METAFLOW_ARGO_EVENTS_EVENT_BUS", "default"),
show_default=True,
)
# Argo events event source
env["METAFLOW_ARGO_EVENTS_EVENT_SOURCE"] = click.prompt(
cyan("[METAFLOW_ARGO_EVENTS_EVENT_SOURCE]") + " Event Source for Argo Events.",
default=existing_env.get("METAFLOW_ARGO_EVENTS_EVENT_SOURCE", ""),
show_default=True,
)
# Argo events event name
env["METAFLOW_ARGO_EVENTS_EVENT"] = click.prompt(
cyan("[METAFLOW_ARGO_EVENTS_EVENT]") + " Event name for Argo Events.",
default=existing_env.get("METAFLOW_ARGO_EVENTS_EVENT", ""),
show_default=True,
)
# Argo events webhook url
env["METAFLOW_ARGO_EVENTS_WEBHOOK_URL"] = click.prompt(
cyan("[METAFLOW_ARGO_EVENTS_WEBHOOK_URL]")
+ " Publicly accessible URL for Argo Events Webhook.",
default=existing_env.get("METAFLOW_ARGO_EVENTS_WEBHOOK_URL", ""),
show_default=True,
)
# Set internal URL for Argo events webhook
env["METAFLOW_ARGO_EVENTS_INTERNAL_WEBHOOK_URL"] = click.prompt(
cyan("[METAFLOW_ARGO_EVENTS_INTERNAL_WEBHOOK_URL]")
+ yellow(" (optional)")
+ " URL for Argo Events Webhook "
+ "(Accessible only within a Kubernetes cluster).",
default=existing_env.get(
"METAFLOW_ARGO_EVENTS_INTERNAL_WEBHOOK_URL",
env["METAFLOW_ARGO_EVENTS_WEBHOOK_URL"],
),
show_default=True,
)
return env
def configure_kubernetes(existing_env):
empty_profile = False
if not existing_env:
empty_profile = True
env = {}
# Set K8S Namespace
env["METAFLOW_KUBERNETES_NAMESPACE"] = click.prompt(
cyan("[METAFLOW_KUBERNETES_NAMESPACE]")
+ yellow(" (optional)")
+ " Kubernetes Namespace ",
default="default",
show_default=True,
)
# Set K8S SA
env["METAFLOW_KUBERNETES_SERVICE_ACCOUNT"] = click.prompt(
cyan("[METAFLOW_KUBERNETES_SERVICE_ACCOUNT]")
+ yellow(" (optional)")
+ " Kubernetes Service Account ",
default="default",
show_default=True,
)
# Set default Docker repository for K8S jobs.
env["METAFLOW_KUBERNETES_CONTAINER_REGISTRY"] = click.prompt(
cyan("[METAFLOW_KUBERNETES_CONTAINER_REGISTRY]")
+ yellow(" (optional)")
+ " Default Docker image repository for K8S "
+ "jobs. If nothing is specified, "
+ "dockerhub (hub.docker.com/) is "
+ "used as default.",
default=existing_env.get("METAFLOW_KUBERNETES_CONTAINER_REGISTRY", ""),
show_default=True,
)
# Set default Docker image for K8S jobs.
env["METAFLOW_KUBERNETES_CONTAINER_IMAGE"] = click.prompt(
cyan("[METAFLOW_KUBERNETES_CONTAINER_IMAGE]")
+ yellow(" (optional)")
+ " Default Docker image for K8S jobs. "
+ "If nothing is specified, an appropriate "
+ "python image is used as default.",
default=existing_env.get("METAFLOW_KUBERNETES_CONTAINER_IMAGE", ""),
show_default=True,
)
# Set default Kubernetes secrets to source into pod envs
env["METAFLOW_KUBERNETES_SECRETS"] = click.prompt(
cyan("[METAFLOW_KUBERNETES_SECRETS]")
+ yellow(" (optional)")
+ " Comma-delimited list of secret names. Jobs will"
" gain environment variables from these secrets. ",
default=existing_env.get("METAFLOW_KUBERNETES_SECRETS", ""),
show_default=True,
)
return env
def verify_aws_credentials(ctx):
# Verify that the user has configured AWS credentials on their computer.
if not click.confirm(
"\nMetaflow relies on "
+ yellow("AWS access credentials")
+ " present on your computer to access resources on AWS."
"\nBefore proceeding further, please confirm that you "
"have already configured these access credentials on "
"this computer.",
default=True,
):
echo(
"There are many ways to setup your AWS access credentials. You "
"can get started by following this guide: ",
nl=False,
fg="yellow",
)
echo(
"https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html",
fg="cyan",
)
ctx.abort()
def verify_azure_credentials(ctx):
# Verify that the user has configured AWS credentials on their computer.
if not click.confirm(
"\nMetaflow relies on "
+ yellow("Azure access credentials")
+ " present on your computer to access resources on Azure."
"\nBefore proceeding further, please confirm that you "
"have already configured these access credentials on "
"this computer.",
default=True,
):
echo(
"There are many ways to setup your Azure access credentials. You "
"can get started by getting familiar with the following: ",
nl=False,
fg="yellow",
)
echo("")
echo(
"- https://docs.microsoft.com/en-us/cli/azure/authenticate-azure-cli",
fg="cyan",
)
echo(
"- https://docs.microsoft.com/en-us/cli/azure/azure-cli-configuration",
fg="cyan",
)
ctx.abort()
def verify_gcp_credentials(ctx):
# Verify that the user has configured AWS credentials on their computer.
if not click.confirm(
"\nMetaflow relies on "
+ yellow("GCP access credentials")
+ " present on your computer to access resources on GCP."
"\nBefore proceeding further, please confirm that you "
"have already configured these access credentials on "
"this computer.",
default=True,
):
echo(
"There are many ways to setup your GCP access credentials. You "
"can get started by getting familiar with the following: ",
nl=False,
fg="yellow",
)
echo("")
echo(
"- https://cloud.google.com/docs/authentication/provide-credentials-adc",
fg="cyan",
)
ctx.abort()
@configure.command(help="Configure metaflow to access Microsoft Azure.")
@click.option(
"--profile",
"-p",
default="",
help="Configure a named profile. Activate the profile by setting "
"`METAFLOW_PROFILE` environment variable.",
)
@click.pass_context
def azure(ctx, profile):
# Greet the user!
echo(
"Welcome to Metaflow! Follow the prompts to configure your installation.\n",
bold=True,
)
# Check for existing configuration.
if not confirm_overwrite_config(profile):
ctx.abort()
verify_azure_credentials(ctx)
existing_env = get_env(profile)
env = {}
env.update(configure_azure_datastore_and_metadata(existing_env))
persist_env({k: v for k, v in env.items() if v}, profile)
# Prompt user to also configure Kubernetes for compute if using azure
if env.get("METAFLOW_DEFAULT_DATASTORE") == "azure":
click.echo(
"\nFinal note! Metaflow can scale your flows by "
+ yellow("executing your steps on Kubernetes.")
+ "\nYou may use Azure Kubernetes Service (AKS)"
" or a self-managed Kubernetes cluster on Azure VMs."
+ " If/when your Kubernetes cluster is ready for use,"
" please run 'metaflow configure kubernetes'.",
)
@configure.command(help="Configure metaflow to access Google Cloud Platform.")
@click.option(
"--profile",
"-p",
default="",
help="Configure a named profile. Activate the profile by setting "
"`METAFLOW_PROFILE` environment variable.",
)
@click.pass_context
def gcp(ctx, profile):
# Greet the user!
echo(
"Welcome to Metaflow! Follow the prompts to configure your installation.\n",
bold=True,
)
# Check for existing configuration.
if not confirm_overwrite_config(profile):
ctx.abort()
verify_gcp_credentials(ctx)
existing_env = get_env(profile)
env = {}
env.update(configure_gs_datastore_and_metadata(existing_env))
persist_env({k: v for k, v in env.items() if v}, profile)
# Prompt user to also configure Kubernetes for compute if using Google Cloud Storage
if env.get("METAFLOW_DEFAULT_DATASTORE") == "gs":
click.echo(
"\nFinal note! Metaflow can scale your flows by "
+ yellow("executing your steps on Kubernetes.")
+ "\nYou may use Google Kubernetes Engine (GKE)"
" or a self-managed Kubernetes cluster on Google Compute Engine VMs."
+ " If/when your Kubernetes cluster is ready for use,"
" please run 'metaflow configure kubernetes'.",
)
@configure.command(help="Configure metaflow to access self-managed AWS resources.")
@click.option(
"--profile",
"-p",
default="",
help="Configure a named profile. Activate the profile by setting "
"`METAFLOW_PROFILE` environment variable.",
)
@click.pass_context
def aws(ctx, profile):
# Greet the user!
echo(
"Welcome to Metaflow! Follow the prompts to configure your " "installation.\n",
bold=True,
)
# Check for existing configuration.
if not confirm_overwrite_config(profile):
ctx.abort()
verify_aws_credentials(ctx)
existing_env = get_env(profile)
empty_profile = False
if not existing_env:
empty_profile = True
env = {}
env.update(configure_aws_datastore_and_metadata(existing_env))
# Configure AWS Batch for compute if using S3
if env.get("METAFLOW_DEFAULT_DATASTORE") == "s3":
if click.confirm(
"\nMetaflow can scale your flows by "
+ yellow("executing your steps on AWS Batch")
+ ".\nAWS Batch is a strict requirement if you intend "
"to schedule your flows on AWS Step Functions.\nWould "
"you like to configure AWS Batch as your compute "
"backend?",
default=empty_profile or "METAFLOW_BATCH_JOB_QUEUE" in existing_env,
abort=False,
):
env.update(configure_aws_batch(existing_env))
persist_env({k: v for k, v in env.items() if v}, profile)
@configure.command(help="Configure metaflow to use Kubernetes.")
@click.option(
"--profile",
"-p",
default="",
help="Configure a named profile. Activate the profile by setting "
"`METAFLOW_PROFILE` environment variable.",
)
@click.pass_context
def kubernetes(ctx, profile):
check_kubernetes_client(ctx)
# Greet the user!
echo(
"Welcome to Metaflow! Follow the prompts to configure your " "installation.\n",
bold=True,
)
check_kubernetes_config(ctx)
# Check for existing configuration.
if not confirm_overwrite_config(profile):
ctx.abort()
existing_env = get_env(profile)
env = existing_env.copy()
# We used to push user straight to S3 configuration inline.
# Now that we support >1 cloud, it gets too complicated.
# Therefore, we instruct the user to configure datastore first, by
# a separate command.
if existing_env.get("METAFLOW_DEFAULT_DATASTORE") == "local":
click.echo(
"\nCannot run Kubernetes with local datastore. Please run"
" 'metaflow configure aws' or 'metaflow configure azure'."
)
click.Abort()
# Configure remote metadata.
if existing_env.get("METAFLOW_DEFAULT_METADATA") == "service":
# Skip metadata service configuration if it is already configured
pass
else:
if click.confirm(
"\nMetaflow can use a "
+ yellow("remote Metadata Service to track")
+ " and persist flow execution metadata. \nWould you like to "
"configure the Metadata Service?",
default=True,
abort=False,
):
env.update(configure_metadata_service(existing_env))
# Configure Kubernetes for compute.
env.update(configure_kubernetes(existing_env))
# Configure Argo Workflows Events
if click.confirm("\nConfigure support for Argo Workflow Events?"):
env.update(configure_argo_events(existing_env))
persist_env({k: v for k, v in env.items() if v}, profile)
================================================
FILE: metaflow/cmd/develop/__init__.py
================================================
from typing import Any
from metaflow.cli import echo_dev_null, echo_always
from metaflow._vendor import click
class CommandObj:
def __init__(self):
pass
@click.group()
@click.pass_context
def cli(ctx):
pass
@cli.group(help="Metaflow develop commands")
@click.option(
"--quiet/--no-quiet",
show_default=True,
default=False,
help="Suppress unnecessary messages",
)
@click.pass_context
def develop(
ctx: Any,
quiet: bool,
):
if quiet:
echo = echo_dev_null
else:
echo = echo_always
obj = CommandObj()
obj.quiet = quiet
obj.echo = echo
obj.echo_always = echo_always
ctx.obj = obj
from . import stubs
================================================
FILE: metaflow/cmd/develop/stub_generator.py
================================================
import functools
import importlib
import inspect
import math
import os
import pathlib
import re
import time
import typing
from datetime import datetime
from io import StringIO
from types import ModuleType
from typing import (
Any,
Callable,
Dict,
ForwardRef,
Iterable,
List,
NewType,
Optional,
Set,
Tuple,
TypeVar,
Union,
cast,
)
from metaflow import FlowSpec, step
from metaflow.debug import debug
from metaflow.decorators import Decorator, FlowDecorator
from metaflow.extension_support import get_aliased_modules
from metaflow.metaflow_current import Current
from metaflow.metaflow_version import get_version
from metaflow.runner.deployer import DeployedFlow, Deployer, TriggeredRun
from metaflow.runner.deployer_impl import DeployerImpl
TAB = " "
METAFLOW_CURRENT_MODULE_NAME = "metaflow.metaflow_current"
METAFLOW_DEPLOYER_MODULE_NAME = "metaflow.runner.deployer"
param_section_header = re.compile(r"Parameters\s*\n----------\s*\n", flags=re.M)
return_section_header = re.compile(r"Returns\s*\n-------\s*\n", flags=re.M)
add_to_current_header = re.compile(
r"MF Add To Current\s*\n-----------------\s*\n", flags=re.M
)
non_indented_line = re.compile(r"^\S+.*$")
param_name_type = re.compile(r"^(?P\S+)(?:\s*:\s*(?P.*))?$")
type_annotations = re.compile(
r"(?P.*?)(?P, optional|\(optional\))?(?:, [Dd]efault(?: is | = |: |s to |)\s*(?P.*))?$"
)
FlowSpecDerived = TypeVar("FlowSpecDerived", bound=FlowSpec)
StepFlag = NewType("StepFlag", bool)
MetaflowStepFunction = Union[
Callable[[FlowSpecDerived, StepFlag], None],
Callable[[FlowSpecDerived, Any, StepFlag], None],
]
# Object that has start() and end() like a Match object to make the code simpler when
# we are parsing different sections of doc
class StartEnd:
def __init__(self, start: int, end: int):
self._start = start
self._end = end
def start(self):
return self._start
def end(self):
return self._end
def type_var_to_str(t: TypeVar) -> str:
bound_name = None
if t.__bound__ is not None:
if isinstance(t.__bound__, typing.ForwardRef):
bound_name = t.__bound__.__forward_arg__
else:
bound_name = t.__bound__.__name__
return 'typing.TypeVar("%s", %scontravariant=%s, covariant=%s%s)' % (
t.__name__,
'bound="%s", ' % bound_name if t.__bound__ else "",
t.__contravariant__,
t.__covariant__,
", ".join([""] + [c.__name__ for c in t.__constraints__]),
)
def new_type_to_str(t: typing.NewType) -> str:
return 'typing.NewType("%s", %s)' % (t.__name__, t.__supertype__.__name__)
def descend_object(object: str, options: Iterable[str]):
# Returns true if:
# - options contains a prefix of object
# - the component after the prefix does not start with _
for opt in options:
new_object = object.removeprefix(opt)
if len(new_object) == len(object):
# There was no prefix, so we continue
continue
# Using [1] to skip the inevitable "."
if len(new_object) == 0 or new_object[1] != "_":
return True
return False
def parse_params_from_doc(doc: str) -> Tuple[List[inspect.Parameter], bool]:
parameters = []
no_arg_version = True
for line in doc.splitlines():
if non_indented_line.match(line):
match = param_name_type.match(line)
arg_name = type_name = is_optional = default = None
default_set = False
if match is not None:
arg_name = match.group("name")
type_name = match.group("type")
if type_name is not None:
type_detail = type_annotations.match(type_name)
if type_detail is not None:
type_name = type_detail.group("type")
is_optional = type_detail.group("optional") is not None
default = type_detail.group("default")
if default:
default_set = True
try:
default = eval(default)
except:
pass
try:
type_name = eval(type_name)
except:
pass
parameters.append(
inspect.Parameter(
name=arg_name,
kind=inspect.Parameter.KEYWORD_ONLY,
default=(
default
if default_set
else None if is_optional else inspect.Parameter.empty
),
annotation=(Optional[type_name] if is_optional else type_name),
)
)
if not default_set:
# If we don't have a default set for any parameter, we can't
# have a no-arg version since the function would be incomplete
no_arg_version = False
return parameters, no_arg_version
def split_docs(
raw_doc: str, boundaries: List[Tuple[str, Union[StartEnd, re.Match]]]
) -> Dict[str, str]:
docs = dict()
boundaries.sort(key=lambda x: x[1].start())
section_start = 0
for idx in range(1, len(boundaries)):
docs[boundaries[idx - 1][0]] = raw_doc[
section_start : boundaries[idx][1].start()
]
section_start = boundaries[idx][1].end()
docs[boundaries[-1][0]] = raw_doc[section_start:]
return docs
def parse_add_to_docs(
raw_doc: str,
) -> Dict[str, Union[Tuple[inspect.Signature, str], str]]:
prop = None
return_type = None
property_indent = None
doc = []
add_to_docs = dict() # type: Dict[str, Union[str, Tuple[inspect.Signature, str]]]
def _add():
if prop:
add_to_docs[prop] = (
inspect.Signature(
[
inspect.Parameter(
"self", inspect.Parameter.POSITIONAL_OR_KEYWORD
)
],
return_annotation=return_type,
),
"\n".join(doc),
)
for line in raw_doc.splitlines():
# Parse stanzas that look like the following:
# -> type
# indented doc string
if property_indent is not None and (
line.startswith(property_indent + " ") or line.strip() == ""
):
offset = len(property_indent)
if line.lstrip().startswith("@@ "):
line = line.replace("@@ ", "")
doc.append(line[offset:].rstrip())
else:
if line.strip() == 0:
continue
if prop:
# Ends a property stanza
_add()
# Now start a new one
line = line.rstrip()
property_indent = line[: len(line) - len(line.lstrip())]
# Either this has a -> to denote a property or it is a pure name
# to denote a reference to a function (starting with #)
line = line.lstrip()
if line.startswith("#"):
# The name of the function is the last part like metaflow.deployer.run
add_to_docs[line.split(".")[-1]] = line[1:]
continue
# This is a line so we split it using "->"
prop, return_type = line.split("->")
prop = prop.strip()
return_type = return_type.strip()
doc = []
_add()
return add_to_docs
def add_indent(indentation: str, text: str) -> str:
return "\n".join([indentation + line for line in text.splitlines()])
class StubGenerator:
"""
This class takes the name of a library as input and a directory as output.
It will then generate the corresponding stub files for each defined type
(generic variables, functions, classes, etc.) at run time.
This means that the code for the library is not statically parsed, but it is
executed and then the types are dynamically created and analyzed to produce the stub
files.
The only items analyzes are those that belong to the library (ie: anything in
the library or below it but not any external imports)
"""
def __init__(self, output_dir: str, include_generated_for: bool = True):
"""
Initializes the StubGenerator.
:param file_path: the file path
:type file_path: str
:param members_from_other_modules: the names of the members defined in other module to be analyzed
:type members_from_other_modules: List[str]
"""
# Let metaflow know we are in stubgen mode. This is sometimes useful to skip
# some processing like loading libraries, etc. It is used in Metaflow extensions
# so do not remove even if you do not see a use for it directly in the code.
os.environ["METAFLOW_STUBGEN"] = "1"
self._write_generated_for = include_generated_for
# First element is the name it should be installed in (alias) and second is the
# actual module name
self._pending_modules = [
("metaflow", "metaflow")
] # type: List[Tuple[str, str]]
self._root_module = "metaflow."
self._safe_modules = ["metaflow.", "metaflow_extensions."]
self._pending_modules.extend(
(self._get_module_name_alias(x), x) for x in get_aliased_modules()
)
# We exclude some modules to not create a bunch of random non-user facing
# .pyi files.
self._exclude_modules = set(
[
"metaflow.cli_args",
"metaflow.cmd",
"metaflow.cmd_with_io",
"metaflow.datastore",
"metaflow.debug",
"metaflow.decorators",
"metaflow.event_logger",
"metaflow.extension_support",
"metaflow.graph",
"metaflow.integrations",
"metaflow.lint",
"metaflow.metaflow_metadata",
"metaflow.metaflow_config_funcs",
"metaflow.metaflow_environment",
"metaflow.metaflow_profile",
"metaflow.metaflow_version",
"metaflow.mflog",
"metaflow.monitor",
"metaflow.package",
"metaflow.plugins.datastores",
"metaflow.plugins.env_escape",
"metaflow.plugins.metadata_providers",
"metaflow.procpoll.py",
"metaflow.R",
"metaflow.runtime",
"metaflow.sidecar",
"metaflow.task",
"metaflow.tracing",
"metaflow.unbounded_foreach",
"metaflow.util",
"metaflow._vendor",
]
)
self._done_modules = set() # type: Set[str]
self._output_dir = output_dir
self._mf_version = get_version()
# Contains the names of the methods that are injected in Deployer
self._deployer_injected_methods = (
{}
) # type: Dict[str, Dict[str, Union[Tuple[str, str], str]]]
# Contains information to add to the Current object (injected by decorators)
self._addl_current = (
dict()
) # type: Dict[str, Dict[str, Tuple[inspect.Signature, str]]]
self._reset()
def _reset(self):
# "Globals" that are used throughout processing. This is not the cleanest
# but simplifies code quite a bit.
# Imports that are needed at the top of the file
self._imports = set() # type: Set[str]
self._sub_module_imports = set() # type: Set[Tuple[str, str]]``
# Typing imports (behind if TYPE_CHECKING) that are needed at the top of the file
self._typing_imports = set() # type: Set[str]
# Typevars that are defined
self._typevars = dict() # type: Dict[str, Union[TypeVar, type]]
# Current objects in the file being processed
self._current_objects = {} # type: Dict[str, Any]
self._current_references = [] # type: List[str]
# Current stubs in the file being processed
self._stubs = [] # type: List[str]
# These have a shorter "scope"
# Current parent module of the object being processed -- used to determine
# the "globals()"
self._current_parent_module = None # type: Optional[ModuleType]
def _get_module_name_alias(self, module_name):
if any(
module_name.startswith(x) for x in self._safe_modules
) and not module_name.startswith(self._root_module):
return self._root_module + ".".join(
["mf_extensions", *module_name.split(".")[1:]]
)
return module_name
def _get_relative_import(
self, new_module_name, cur_module_name, is_init_module=False
):
new_components = new_module_name.split(".")
cur_components = cur_module_name.split(".")
init_module_count = 1 if is_init_module else 0
common_idx = 0
max_idx = min(len(new_components), len(cur_components))
while (
common_idx < max_idx
and new_components[common_idx] == cur_components[common_idx]
):
common_idx += 1
# current: a.b and parent: a.b.e.d -> from .e.d import
# current: a.b.c.d and parent: a.b.e.f -> from ...e.f import
return "." * (len(cur_components) - common_idx + init_module_count) + ".".join(
new_components[common_idx:]
)
def _get_module(self, alias, name):
debug.stubgen_exec("Analyzing module %s (aliased at %s)..." % (name, alias))
self._current_module = importlib.import_module(name)
self._current_module_name = alias
for objname, obj in self._current_module.__dict__.items():
if objname == "_addl_stubgen_modules":
debug.stubgen_exec(
"Adding modules %s from _addl_stubgen_modules" % str(obj)
)
self._pending_modules.extend(
(self._get_module_name_alias(m), m) for m in obj
)
continue
if objname.startswith("_"):
debug.stubgen_exec(
"Skipping object because it starts with _ %s" % objname
)
continue
if inspect.ismodule(obj):
# Only consider modules that are safe modules
if (
any(obj.__name__.startswith(m) for m in self._safe_modules)
and not obj.__name__ in self._exclude_modules
):
debug.stubgen_exec(
"Adding child module %s to process" % obj.__name__
)
new_module_alias = self._get_module_name_alias(obj.__name__)
self._pending_modules.append((new_module_alias, obj.__name__))
new_parent, new_name = new_module_alias.rsplit(".", 1)
self._current_references.append(
"from %s import %s as %s"
% (
self._get_relative_import(
new_parent,
alias,
hasattr(self._current_module, "__path__"),
),
new_name,
objname,
)
)
else:
debug.stubgen_exec("Skipping child module %s" % obj.__name__)
else:
parent_module = inspect.getmodule(obj)
# For objects we include:
# - stuff that is a functools.partial (these are all the decorators;
# we could be more specific but good enough for now) for root module.
# We also include the step decorator (it's from metaflow.decorators
# which is typically excluded)
# - Stuff that is defined in this module itself
# - a reference to anything in the modules we will process later
# (so we don't duplicate a ton of times)
if (
parent_module is None
or (
name + "." == self._root_module
and (
(parent_module.__name__.startswith("functools"))
or obj == step
)
)
or parent_module.__name__ == name
):
debug.stubgen_exec("Adding object %s to process" % objname)
self._current_objects[objname] = obj
elif not any(
[
parent_module.__name__.startswith(p)
for p in self._exclude_modules
]
) and any(
[parent_module.__name__.startswith(p) for p in self._safe_modules]
):
parent_alias = self._get_module_name_alias(parent_module.__name__)
relative_import = self._get_relative_import(
parent_alias, alias, hasattr(self._current_module, "__path__")
)
debug.stubgen_exec(
"Adding reference %s and adding module %s as %s"
% (objname, parent_module.__name__, parent_alias)
)
obj_import_name = getattr(obj, "__name__", objname)
if obj_import_name == "":
# We have one case of this
obj_import_name = objname
self._current_references.append(
"from %s import %s as %s"
% (relative_import, obj_import_name, objname)
)
self._pending_modules.append((parent_alias, parent_module.__name__))
else:
debug.stubgen_exec("Skipping object %s" % objname)
def _get_element_name_with_module(
self, element: Union[TypeVar, type, Any], force_import=False
) -> str:
# The element can be a string, for example "def f() -> 'SameClass':..."
def _add_to_import(name):
if name != self._current_module_name:
self._imports.add(name)
def _add_to_typing_check(name, is_module=False):
if name == "None":
return
if is_module:
self._typing_imports.add(name)
else:
splits = name.rsplit(".", 1)
if len(splits) > 1 and not (
len(splits) == 2 and splits[0] == self._current_module_name
):
# We don't add things that are just one name -- probably things within
# the current file
self._typing_imports.add(splits[0])
def _format_qualified_class_name(cls: type) -> str:
"""Helper to format a class with its qualified module name"""
# Special case for NoneType - return None
if cls.__name__ == "NoneType":
return "None"
module = inspect.getmodule(cls)
if (
module
and module.__name__ != "builtins"
and module.__name__ != "__main__"
):
module_name = self._get_module_name_alias(module.__name__)
_add_to_typing_check(module_name, is_module=True)
return f"{module_name}.{cls.__name__}"
else:
return cls.__name__
if isinstance(element, str):
# Special case for self referential things (particularly in a class)
if element == self._current_name:
return '"%s"' % element
# We first try to eval the annotation because with the annotations future
# it is always a string
try:
potential_element = eval(
element,
(
self._current_parent_module.__dict__
if self._current_parent_module
else None
),
)
if potential_element:
element = potential_element
except:
pass
if isinstance(element, str):
# If we are in our "safe" modules, make sure we alias properly
if any(element.startswith(x) for x in self._safe_modules):
element = self._get_module_name_alias(element)
_add_to_typing_check(element)
return '"%s"' % element
# 3.10+ has NewType as a class but not before so hack around to check for NewType
elif isinstance(element, TypeVar) or hasattr(element, "__supertype__"):
if not element.__name__ in self._typevars:
self._typevars[element.__name__] = element
return element.__name__
elif isinstance(element, type):
module = inspect.getmodule(element)
if (
module is None
or module.__name__ == "builtins"
or module.__name__ == "__main__"
):
# Special case for "NoneType" -- return None as NoneType is only 3.10+
if element.__name__ == "NoneType":
return "None"
return element.__name__
module_name = self._get_module_name_alias(module.__name__)
if force_import:
_add_to_import(module_name.split(".")[0])
_add_to_typing_check(module_name, is_module=True)
if module_name != self._current_module_name:
return "{0}.{1}".format(module_name, element.__name__)
else:
return element.__name__
elif isinstance(element, type(Ellipsis)):
return "..."
elif isinstance(element, typing._GenericAlias):
# We need to check things recursively in __args__ if it exists
args_str = []
for arg in getattr(element, "__args__", []):
# Special handling for class objects in type arguments
if isinstance(arg, type):
args_str.append(_format_qualified_class_name(arg))
else:
args_str.append(self._get_element_name_with_module(arg))
_add_to_import("typing")
if element._name:
if element._name == "Optional":
# We don't want to include NoneType in the string -- it breaks things
args_str = args_str[:1]
elif element._name == "Callable":
# We need to make this a list of everything except the end one
# except if it is an ellipsis
if args_str[0] != "...":
call_args = "[" + ", ".join(args_str[:-1]) + "]"
args_str = [call_args, args_str[-1]]
elif element._name == "Tuple" and not args_str:
# Tuple[()] means an empty tuple; Tuple[] is invalid syntax
return "typing.Tuple[()]"
return "typing.%s[%s]" % (element._name, ", ".join(args_str))
else:
# Handle the case where we have a generic type without a _name
origin = element.__origin__
if isinstance(origin, type):
origin_str = _format_qualified_class_name(origin)
else:
origin_str = str(origin)
return "%s[%s]" % (origin_str, ", ".join(args_str))
elif isinstance(element, ForwardRef):
f_arg = self._get_module_name_alias(element.__forward_arg__)
_add_to_typing_check(f_arg)
return '"%s"' % f_arg
elif inspect.getmodule(element) == inspect.getmodule(typing):
_add_to_import("typing")
# Special handling for NamedTuple which is a function
if hasattr(element, "__name__") and element.__name__ == "NamedTuple":
return "typing.NamedTuple"
return str(element)
else:
if hasattr(element, "__module__"):
elem_module = self._get_module_name_alias(element.__module__)
if elem_module == "builtins":
return getattr(element, "__name__", str(element))
_add_to_typing_check(elem_module, is_module=True)
return "{0}.{1}".format(
elem_module, getattr(element, "__name__", element)
)
else:
# A constant
return str(element)
def _exploit_annotation(self, annotation: Any, starting: str = ": ") -> str:
annotation_string = ""
if annotation and annotation != inspect.Parameter.empty:
annotation_string += starting + self._get_element_name_with_module(
annotation
)
return annotation_string
def _generate_class_stub(self, name: str, clazz: type) -> str:
debug.stubgen_exec("Generating class stub for %s" % name)
skip_init = issubclass(clazz, (TriggeredRun, DeployedFlow))
if issubclass(clazz, DeployerImpl):
if clazz.TYPE is not None:
clazz_type = clazz.TYPE.replace("-", "_")
self._deployer_injected_methods.setdefault(clazz_type, {})[
"deployer"
] = (self._current_module_name + "." + name)
# Handle TypedDict gracefully for Python 3.7 compatibility
# _TypedDictMeta is not available in Python 3.7
typed_dict_meta = getattr(typing, "_TypedDictMeta", None)
if typed_dict_meta is not None and isinstance(clazz, typed_dict_meta):
self._sub_module_imports.add(("typing", "TypedDict"))
total_flag = getattr(clazz, "__total__", False)
buff = StringIO()
# Emit the TypedDict base and total flag
buff.write(f"class {name}(TypedDict, total={total_flag}):\n")
# Write out each field from __annotations__
for field_name, field_type in clazz.__annotations__.items():
ann = self._get_element_name_with_module(field_type)
buff.write(f"{TAB}{field_name}: {ann}\n")
return buff.getvalue()
buff = StringIO()
# Class prototype
buff.write("class " + name.split(".")[-1] + "(")
# Add super classes
for c in clazz.__bases__:
name_with_module = self._get_element_name_with_module(c, force_import=True)
buff.write(name_with_module + ", ")
# Add metaclass
name_with_module = self._get_element_name_with_module(
clazz.__class__, force_import=True
)
buff.write("metaclass=" + name_with_module + "):\n")
# Add class docstring
if clazz.__doc__:
buff.write('%s"""\n' % TAB)
my_doc = inspect.cleandoc(clazz.__doc__)
init_blank = True
for line in my_doc.split("\n"):
if init_blank and len(line.strip()) == 0:
continue
init_blank = False
buff.write("%s%s\n" % (TAB, line.rstrip()))
buff.write('%s"""\n' % TAB)
# For NamedTuple, we have __annotations__ but no __init__. In that case,
# we are going to "create" a __init__ function with the annotations
# to show what the class takes.
annotation_dict = None
init_func = None
for key, element in clazz.__dict__.items():
func_deco = None
if isinstance(element, staticmethod):
func_deco = "@staticmethod"
element = element.__func__
elif isinstance(element, classmethod):
func_deco = "@classmethod"
element = element.__func__
if key == "__init__":
if skip_init:
continue
init_func = element
elif key == "__annotations__":
annotation_dict = element
if inspect.isfunction(element):
if not element.__name__.startswith("_") or element.__name__.startswith(
"__"
):
if (
clazz == Deployer
and element.__name__ in self._deployer_injected_methods
):
# This is a method that was injected. It has docs but we need
# to parse it to generate the proper signature
func_doc = inspect.cleandoc(element.__doc__)
docs = split_docs(
func_doc,
[
("func_doc", StartEnd(0, 0)),
(
"param_doc",
param_section_header.search(func_doc)
or StartEnd(len(func_doc), len(func_doc)),
),
(
"return_doc",
return_section_header.search(func_doc)
or StartEnd(len(func_doc), len(func_doc)),
),
],
)
parameters, _ = parse_params_from_doc(docs["param_doc"])
return_type = self._deployer_injected_methods[element.__name__][
"deployer"
]
buff.write(
self._generate_function_stub(
key,
element,
sign=[
inspect.Signature(
parameters=[
inspect.Parameter(
"self",
inspect.Parameter.POSITIONAL_OR_KEYWORD,
)
]
+ parameters,
return_annotation=return_type,
)
],
indentation=TAB,
deco=func_deco,
)
)
elif (
clazz == DeployedFlow and element.__name__ == "from_deployment"
):
# We simply update the signature to list the return
# type as a union of all possible deployers
func_doc = inspect.cleandoc(element.__doc__)
docs = split_docs(
func_doc,
[
("func_doc", StartEnd(0, 0)),
(
"param_doc",
param_section_header.search(func_doc)
or StartEnd(len(func_doc), len(func_doc)),
),
(
"return_doc",
return_section_header.search(func_doc)
or StartEnd(len(func_doc), len(func_doc)),
),
],
)
parameters, _ = parse_params_from_doc(docs["param_doc"])
def _create_multi_type(*l):
return typing.Union[l]
all_types = [
v["from_deployment"][0]
for v in self._deployer_injected_methods.values()
]
if len(all_types) > 1:
return_type = _create_multi_type(*all_types)
else:
return_type = all_types[0] if len(all_types) else None
buff.write(
self._generate_function_stub(
key,
element,
sign=[
inspect.Signature(
parameters=[
inspect.Parameter(
"cls",
inspect.Parameter.POSITIONAL_OR_KEYWORD,
)
]
+ parameters,
return_annotation=return_type,
)
],
indentation=TAB,
doc=docs["func_doc"]
+ "\n\nParameters\n----------\n"
+ docs["param_doc"]
+ "\n\nReturns\n-------\n"
+ "%s\nA `DeployedFlow` object" % str(return_type),
deco=func_deco,
)
)
elif (
clazz == DeployedFlow
and element.__name__.startswith("from_")
and element.__name__[5:] in self._deployer_injected_methods
):
# Get the doc from the from_deployment method stored in
# self._deployer_injected_methods
func_doc = inspect.cleandoc(
self._deployer_injected_methods[element.__name__[5:]][
"from_deployment"
][1]
or ""
)
docs = split_docs(
func_doc,
[
("func_doc", StartEnd(0, 0)),
(
"param_doc",
param_section_header.search(func_doc)
or StartEnd(len(func_doc), len(func_doc)),
),
(
"return_doc",
return_section_header.search(func_doc)
or StartEnd(len(func_doc), len(func_doc)),
),
],
)
parameters, _ = parse_params_from_doc(docs["param_doc"])
return_type = self._deployer_injected_methods[
element.__name__[5:]
]["from_deployment"][0]
buff.write(
self._generate_function_stub(
key,
element,
sign=[
inspect.Signature(
parameters=[
inspect.Parameter(
"cls",
inspect.Parameter.POSITIONAL_OR_KEYWORD,
)
]
+ parameters,
return_annotation=return_type,
)
],
indentation=TAB,
doc=docs["func_doc"]
+ "\n\nParameters\n----------\n"
+ docs["param_doc"]
+ "\n\nReturns\n-------\n"
+ docs["return_doc"],
deco=func_deco,
)
)
else:
if (
issubclass(clazz, DeployedFlow)
and clazz.TYPE is not None
and key == "from_deployment"
):
clazz_type = clazz.TYPE.replace("-", "_")
# Record docstring for this function
self._deployer_injected_methods.setdefault(clazz_type, {})[
"from_deployment"
] = (
self._current_module_name + "." + name,
element.__doc__,
)
buff.write(
self._generate_function_stub(
key,
element,
indentation=TAB,
deco=func_deco,
)
)
elif isinstance(element, property):
if element.fget:
buff.write(
self._generate_function_stub(
key, element.fget, indentation=TAB, deco="@property"
)
)
if element.fset:
buff.write(
self._generate_function_stub(
key, element.fset, indentation=TAB, deco="@%s.setter" % key
)
)
# Special handling of classes that have injected methods
if clazz == Current:
# Multiple decorators can add the same object (trigger and trigger_on_finish)
# as examples so we sort it out.
resulting_dict = (
dict()
) # type Dict[str, List[inspect.Signature, str, List[str]]]
for deco_name, addl_current in self._addl_current.items():
for name, (sign, doc) in addl_current.items():
r = resulting_dict.setdefault(name, [sign, doc, []])
r[2].append("@%s" % deco_name)
for name, (sign, doc, decos) in resulting_dict.items():
buff.write(
self._generate_function_stub(
name,
sign=[sign],
indentation=TAB,
doc="(only in the presence of the %s decorator%s)\n\n"
% (", or ".join(decos), "" if len(decos) == 1 else "s")
+ doc,
deco="@property",
)
)
if not skip_init and init_func is None and annotation_dict:
buff.write(
self._generate_function_stub(
"__init__",
func=None,
sign=[
inspect.Signature(
parameters=[
inspect.Parameter(
name="self",
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
)
]
+ [
inspect.Parameter(
name=name,
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
annotation=annotation,
)
for name, annotation in annotation_dict.items()
]
)
],
indentation=TAB,
)
)
buff.write("%s...\n" % TAB)
return buff.getvalue()
def _extract_signature_from_decorator(
self, name: str, raw_doc: Optional[str], is_flow_decorator: bool = False
) -> Optional[List[Tuple[inspect.Signature, str]]]:
# TODO: This only handles the `Parameters` section for now; we are
# using it only to parse the documentation for step/flow decorators so
# this is enough for now but it could be extended more.
# Inspired from:
# https://github.com/rr-/docstring_parser/blob/master/docstring_parser/numpydoc.py
if raw_doc is None:
return None
if not "FlowSpecDerived" in self._typevars:
self._typevars["FlowSpecDerived"] = FlowSpecDerived
self._typevars["StepFlag"] = StepFlag
raw_doc = inspect.cleandoc(raw_doc)
section_boundaries = [
("func_doc", StartEnd(0, 0)),
(
"param_doc",
param_section_header.search(raw_doc)
or StartEnd(len(raw_doc), len(raw_doc)),
),
(
"add_to_current_doc",
add_to_current_header.search(raw_doc)
or StartEnd(len(raw_doc), len(raw_doc)),
),
]
docs = split_docs(raw_doc, section_boundaries)
parameters, no_arg_version = parse_params_from_doc(docs["param_doc"])
if docs["add_to_current_doc"]:
self._addl_current[name] = parse_add_to_docs(docs["add_to_current_doc"])
result = []
if no_arg_version:
if is_flow_decorator:
if docs["param_doc"]:
result.append(
(
inspect.Signature(
parameters=parameters,
return_annotation=Callable[
[typing.Type[FlowSpecDerived]],
typing.Type[FlowSpecDerived],
],
),
"",
)
)
result.append(
(
inspect.Signature(
parameters=[
inspect.Parameter(
name="f",
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
annotation=typing.Type[FlowSpecDerived],
)
],
return_annotation=typing.Type[FlowSpecDerived],
),
"",
),
)
else:
if docs["param_doc"]:
result.append(
(
inspect.Signature(
parameters=parameters,
return_annotation=typing.Callable[
[MetaflowStepFunction], MetaflowStepFunction
],
),
"",
)
)
result.extend(
[
(
inspect.Signature(
parameters=[
inspect.Parameter(
name="f",
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
annotation=Callable[
[FlowSpecDerived, StepFlag], None
],
)
],
return_annotation=Callable[
[FlowSpecDerived, StepFlag], None
],
),
"",
),
(
inspect.Signature(
parameters=[
inspect.Parameter(
name="f",
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
annotation=Callable[
[FlowSpecDerived, Any, StepFlag], None
],
)
],
return_annotation=Callable[
[FlowSpecDerived, Any, StepFlag], None
],
),
"",
),
]
)
if is_flow_decorator:
result = result + [
(
inspect.Signature(
parameters=(
[
inspect.Parameter(
name="f",
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
annotation=Optional[typing.Type[FlowSpecDerived]],
default=(
None
if no_arg_version
else inspect.Parameter.empty
),
)
]
+ parameters
if no_arg_version
else [] + parameters
),
return_annotation=(
inspect.Signature.empty
if no_arg_version
else Callable[
[typing.Type[FlowSpecDerived]],
typing.Type[FlowSpecDerived],
]
),
),
"",
),
]
else:
result = result + [
(
inspect.Signature(
parameters=(
[
inspect.Parameter(
name="f",
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
annotation=Optional[MetaflowStepFunction],
default=(
None
if no_arg_version
else inspect.Parameter.empty
),
)
]
+ parameters
if no_arg_version
else [] + parameters
),
return_annotation=(
inspect.Signature.empty
if no_arg_version
else typing.Callable[
[MetaflowStepFunction], MetaflowStepFunction
]
),
),
"",
),
]
if len(result) == 2:
# If we only have one overload -- we don't need it at all. Happens for
# flow-level decorators that don't take any arguments
result = result[1:]
# Add doc to first and last overloads. Jedi uses the last one and pycharm
# the first one. Go figure.
result_docstring = docs["func_doc"]
if docs["param_doc"]:
result_docstring += "\nParameters\n----------\n" + docs["param_doc"]
result[0] = (
result[0][0],
result_docstring,
)
result[-1] = (
result[-1][0],
result_docstring,
)
return result
def _generate_function_stub(
self,
name: str,
func: Optional[Union[Callable, classmethod]] = None,
sign: Optional[List[inspect.Signature]] = None,
indentation: Optional[str] = None,
doc: Optional[str] = None,
deco: Optional[str] = None,
) -> str:
debug.stubgen_exec("Generating function stub for %s" % name)
def exploit_default(default_value: Any) -> Optional[str]:
if default_value == inspect.Parameter.empty:
return None
if type(default_value).__module__ == "builtins":
if isinstance(default_value, list):
return (
"["
+ ", ".join(
[cast(str, exploit_default(v)) for v in default_value]
)
+ "]"
)
elif isinstance(default_value, tuple):
return (
"("
+ ", ".join(
[cast(str, exploit_default(v)) for v in default_value]
)
+ ")"
)
elif isinstance(default_value, dict):
return (
"{"
+ ", ".join(
[
cast(str, exploit_default(k))
+ ": "
+ cast(str, exploit_default(v))
for k, v in default_value.items()
]
)
+ "}"
)
elif isinstance(default_value, str):
return repr(default_value) # Use repr() for proper escaping
elif isinstance(default_value, (int, float, bool)):
return str(default_value)
elif default_value is None:
return "None"
else:
return "..." # For other built-in types not explicitly handled
elif inspect.isclass(default_value) or inspect.isfunction(default_value):
if default_value.__module__ == "builtins":
return default_value.__name__
else:
self._typing_imports.add(default_value.__module__)
return ".".join([default_value.__module__, default_value.__name__])
else:
return "..." # For complex objects like class instances
buff = StringIO()
if sign is None and func is None:
raise RuntimeError(
"Cannot generate stub for function %s with either a function or signature"
% name
)
try:
sign = sign or [inspect.signature(cast(Callable, func))]
except ValueError:
# In 3.7, NamedTuples have properties that then give an operator.itemgetter
# which doesn't have a signature. We ignore for now. It doesn't have much
# value
return ""
doc = doc or func.__doc__
if doc == "STUBGEN_IGNORE":
# Ignore methods that have STUBGEN_IGNORE. Used to ignore certain
# methods for the Deployer
return ""
indentation = indentation or ""
# Deal with overload annotations -- the last one will be non overloaded and
# will be the one that shows up as the type hint (for Jedi and PyCharm which
# don't handle overloads as well)
do_overload = False
if sign and len(sign) > 1:
do_overload = True
for count, my_sign in enumerate(sign):
if count > 0:
buff.write("\n")
if do_overload and count < len(sign) - 1:
# According to mypy, we should have this on all variants but
# some IDEs seem to prefer if there is one non-overloaded
# This also changes our checks so if changing, modify tests
buff.write(indentation + "@typing.overload\n")
if deco:
buff.write(indentation + deco + "\n")
buff.write(indentation + "def " + name + "(")
kw_only_param = False
has_var_args = False
for i, (par_name, parameter) in enumerate(my_sign.parameters.items()):
annotation = self._exploit_annotation(parameter.annotation)
default = exploit_default(parameter.default)
if (
kw_only_param
and not has_var_args
and parameter.kind != inspect.Parameter.KEYWORD_ONLY
):
raise RuntimeError(
"In function '%s': cannot have a positional parameter after a "
"keyword only parameter" % name
)
if (
parameter.kind == inspect.Parameter.KEYWORD_ONLY
and not kw_only_param
and not has_var_args
):
kw_only_param = True
buff.write("*, ")
if parameter.kind == inspect.Parameter.VAR_KEYWORD:
par_name = "**%s" % par_name
elif parameter.kind == inspect.Parameter.VAR_POSITIONAL:
has_var_args = True
par_name = "*%s" % par_name
if default:
buff.write(par_name + annotation + " = " + default)
else:
buff.write(par_name + annotation)
if i < len(my_sign.parameters) - 1:
buff.write(", ")
ret_annotation = self._exploit_annotation(
my_sign.return_annotation, starting=" -> "
)
buff.write(")" + ret_annotation + ":\n")
if (count == 0 or count == len(sign) - 1) and doc is not None:
buff.write('%s%s"""\n' % (indentation, TAB))
my_doc = inspect.cleandoc(doc)
init_blank = True
for line in my_doc.split("\n"):
if init_blank and len(line.strip()) == 0:
continue
init_blank = False
buff.write("%s%s%s\n" % (indentation, TAB, line.rstrip()))
buff.write('%s%s"""\n' % (indentation, TAB))
buff.write("%s%s...\n" % (indentation, TAB))
return buff.getvalue()
def _generate_generic_stub(self, element_name: str, element: Any) -> str:
return "{0}: {1}\n".format(
element_name, self._get_element_name_with_module(type(element))
)
def _generate_stubs(self):
for name, attr in self._current_objects.items():
self._current_parent_module = inspect.getmodule(attr)
self._current_name = name
if inspect.isclass(attr):
self._stubs.append(self._generate_class_stub(name, attr))
elif inspect.isfunction(attr):
# Special handling of the `step` function where we want to add an
# overload. This is just a single case so we don't make it general.
# Unfortunately, when iterating, it doesn't see the @overload
if (
name == "step"
and self._current_module_name == self._root_module[:-1]
):
self._stubs.append(
self._generate_function_stub(
name,
func=attr,
sign=[
inspect.Signature(
parameters=[
inspect.Parameter(
name="f",
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
annotation=Callable[
[FlowSpecDerived], None
],
)
],
return_annotation=Callable[
[FlowSpecDerived, StepFlag], None
],
),
inspect.Signature(
parameters=[
inspect.Parameter(
name="f",
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
annotation=Callable[
[FlowSpecDerived, Any], None
],
)
],
return_annotation=Callable[
[FlowSpecDerived, Any, StepFlag], None
],
),
inspect.signature(attr),
],
)
)
else:
self._stubs.append(self._generate_function_stub(name, attr))
elif isinstance(attr, functools.partial):
if issubclass(attr.args[0], Decorator):
# Special case where we are going to extract the parameters from
# the docstring to make the decorator look nicer
res = self._extract_signature_from_decorator(
name,
attr.args[0].__doc__,
is_flow_decorator=issubclass(attr.args[0], FlowDecorator),
)
if res:
self._stubs.append(
self._generate_function_stub(
name,
func=attr.func,
sign=[r[0] for r in res],
doc=res[-1][1],
)
)
else:
# print(
# "WARNING: Could not extract decorator signature for %s"
# % name
# )
pass
else:
self._stubs.append(
self._generate_function_stub(
name, attr.func, doc=attr.args[0].__doc__
)
)
elif not inspect.ismodule(attr):
self._stubs.append(self._generate_generic_stub(name, attr))
def _write_header(self, f, width):
title_line = "Auto-generated Metaflow stub file"
title_white_space = (width - len(title_line)) / 2
title_line = "#%s%s%s#\n" % (
" " * math.floor(title_white_space),
title_line,
" " * math.ceil(title_white_space),
)
f.write(
"#" * (width + 2)
+ "\n"
+ title_line
+ "# MF version: %s%s#\n"
% (self._mf_version, " " * (width - 13 - len(self._mf_version)))
+ "# Generated on %s%s#\n"
% (
datetime.fromtimestamp(time.time()).isoformat(),
" " * (width - 14 - 26),
)
+ "#" * (width + 2)
+ "\n\n"
)
def write_out(self):
out_dir = self._output_dir
os.makedirs(out_dir, exist_ok=True)
# Write out py.typed (pylance seems to require it even though it is not
# required in PEP 561) as well as a file we will use to check the "version"
# of the stubs -- this helps to inform the user if the stubs were generated
# for another version of Metaflow.
pathlib.Path(os.path.join(out_dir, "py.typed")).touch()
if self._write_generated_for:
pathlib.Path(os.path.join(out_dir, "generated_for.txt")).write_text(
"%s %s"
% (self._mf_version, datetime.fromtimestamp(time.time()).isoformat())
)
post_process_modules = []
is_post_processing = False
while len(self._pending_modules) != 0 or len(post_process_modules) != 0:
if is_post_processing or len(self._pending_modules) == 0:
is_post_processing = True
module_alias, module_name = post_process_modules.pop(0)
else:
module_alias, module_name = self._pending_modules.pop(0)
# Skip vendored stuff
if module_alias.startswith("metaflow._vendor") or module_name.startswith(
"metaflow._vendor"
):
continue
# We delay current module and deployer module to the end since they
# depend on info we gather elsewhere
if (
module_alias
in (
METAFLOW_CURRENT_MODULE_NAME,
METAFLOW_DEPLOYER_MODULE_NAME,
)
and len(self._pending_modules) != 0
):
post_process_modules.append((module_alias, module_name))
continue
if module_alias in self._done_modules:
continue
self._done_modules.add(module_alias)
# If not, we process the module
self._reset()
self._get_module(module_alias, module_name)
if module_name == "metaflow" and not is_post_processing:
# We will want to regenerate this at the end to take into account
# any changes to the Deployer
post_process_modules.append((module_name, module_name))
self._done_modules.remove(module_name)
continue
self._generate_stubs()
if hasattr(self._current_module, "__path__"):
# This is a package (so a directory) and we are dealing with
# a __init__.pyi type of case
dir_path = os.path.join(self._output_dir, *module_alias.split(".")[1:])
else:
# This is NOT a package so the original source file is not a __init__.py
dir_path = os.path.join(
self._output_dir, *module_alias.split(".")[1:-1]
)
out_file = os.path.join(
dir_path, os.path.basename(self._current_module.__file__) + "i"
)
width = 100
os.makedirs(os.path.dirname(out_file), exist_ok=True)
# We want to make sure we always have a __init__.pyi in the directories
# we are creating
parts = dir_path.split(os.sep)[len(self._output_dir.split(os.sep)) :]
for i in range(1, len(parts) + 1):
init_file_path = os.path.join(
self._output_dir, *parts[:i], "__init__.pyi"
)
if not os.path.exists(init_file_path):
with open(init_file_path, mode="w", encoding="utf-8") as f:
self._write_header(f, width)
with open(out_file, mode="w", encoding="utf-8") as f:
self._write_header(f, width)
f.write("from __future__ import annotations\n\n")
imported_typing = False
for module in self._imports:
f.write("import " + module + "\n")
if module == "typing":
imported_typing = True
for module, sub_module in self._sub_module_imports:
f.write(f"from {module} import {sub_module}\n")
if self._typing_imports:
if not imported_typing:
f.write("import typing\n")
imported_typing = True
f.write("if typing.TYPE_CHECKING:\n")
for module in self._typing_imports:
f.write(TAB + "import " + module + "\n")
if self._typevars:
if not imported_typing:
f.write("import typing\n")
imported_typing = True
for type_name, type_var in self._typevars.items():
if isinstance(type_var, TypeVar):
f.write(
"%s = %s\n" % (type_name, type_var_to_str(type_var))
)
else:
f.write(
"%s = %s\n" % (type_name, new_type_to_str(type_var))
)
f.write("\n")
for import_line in self._current_references:
f.write(import_line + "\n")
f.write("\n")
for stub in self._stubs:
f.write(stub + "\n")
if is_post_processing:
# Don't consider any pending modules if we are post processing
self._pending_modules.clear()
if __name__ == "__main__":
gen = StubGenerator("./stubs")
gen.write_out()
================================================
FILE: metaflow/cmd/develop/stubs.py
================================================
import importlib
import os
import subprocess
import sys
import tempfile
from typing import Any, List, Optional, Tuple
from metaflow._vendor import click
from . import develop
from .stub_generator import StubGenerator
_py_ver = sys.version_info[:2]
if _py_ver >= (3, 8):
from importlib import metadata
elif _py_ver >= (3, 7):
from metaflow._vendor.v3_7 import importlib_metadata as metadata
else:
from metaflow._vendor.v3_6 import importlib_metadata as metadata
@develop.group(short_help="Stubs management")
@click.pass_context
def stubs(ctx: Any):
"""
Stubs provide type hints and documentation hints to IDEs and are typically provided
inline with the code where a static analyzer can pick them up. In Metaflow's case,
however, proper stubs rely on dynamic behavior (ie: the decorators are
generated at runtime). This makes it necessary to have separate stub files.
This CLI provides utilities to check and generate stubs for your current Metaflow
installation.
"""
@stubs.command(short_help="Check validity of stubs")
@click.pass_context
def check(ctx: Any):
"""
Checks the currently installed stubs (if they exist) and validates that they
match the currently installed version of Metaflow.
"""
dist_packages, paths = get_packages_for_stubs()
if len(dist_packages) + len(paths) == 0:
return print_status(ctx, "no package provides `metaflow-stubs`", False)
if len(dist_packages) + len(paths) == 1:
if dist_packages:
return print_status(
ctx, *internal_check(dist_packages[0][1], dist_packages[0][0])
)
return print_status(ctx, *internal_check(paths[0]))
pkg_names = None
pkg_paths = None
if dist_packages:
pkg_names = " packages " + ", ".join([p[0] for p in dist_packages])
if paths:
pkg_paths = "directories at " + ", ".join(paths)
return print_status(
ctx,
"metaflow-stubs is provided multiple times by%s %s%s"
% (
pkg_names if pkg_names else "",
"and " if pkg_names and pkg_paths else "",
pkg_paths if pkg_paths else "",
),
False,
)
@stubs.command(short_help="Remove all packages providing metaflow stubs")
@click.pass_context
def remove(ctx: Any):
"""
Removes all packages that provide metaflow-stubs from the current Python environment.
"""
dist_packages, paths = get_packages_for_stubs()
if len(dist_packages) + len(paths) == 0:
if ctx.obj.quiet:
ctx.obj.echo_always("not_installed")
else:
ctx.obj.echo("No packages provide `metaflow-stubs")
if paths:
raise RuntimeError(
"Cannot remove stubs when metaflow-stubs is already provided by a directory. "
"Please remove the following and try again: %s" % ", ".join(paths)
)
pkgs_to_remove = [p[0] for p in dist_packages]
ctx.obj.echo(
"Uninstalling existing packages providing metaflow-stubs: %s"
% ", ".join(pkgs_to_remove)
)
subprocess.check_call(
[
sys.executable,
"-m",
"pip",
"uninstall",
"-y",
*pkgs_to_remove,
],
stderr=subprocess.DEVNULL if ctx.obj.quiet else None,
stdout=subprocess.DEVNULL if ctx.obj.quiet else None,
)
if ctx.obj.quiet:
ctx.obj.echo_always("ok")
else:
ctx.obj.echo("All packages providing metaflow-stubs have been removed.")
@stubs.command(short_help="Generate Python stubs")
@click.pass_context
@click.option(
"--force/--no-force",
default=False,
show_default=True,
help="Force installation of stubs even if they exist and are valid",
)
def install(ctx: Any, force: bool):
"""
Generates the Python stubs for Metaflow considering the installed version of
Metaflow. The stubs will be generated if they do not exist or do not match the
current version of Metaflow and installed in the Python environment.
"""
try:
import build
except ImportError:
raise RuntimeError(
"Installing stubs requires 'build' -- please install it and try again"
)
dist_packages, paths = get_packages_for_stubs()
if paths:
raise RuntimeError(
"Cannot install stubs when metaflow-stubs is already provided by a directory. "
"Please remove the following and try again: %s" % ", ".join(paths)
)
if len(dist_packages) == 1:
if internal_check(dist_packages[0][1])[1] == True and not force:
if ctx.obj.quiet:
ctx.obj.echo_always("already_installed")
else:
ctx.obj.echo(
"Metaflow stubs are already installed and valid -- use --force to reinstall"
)
return
mf_version, _ = get_mf_version(True)
with tempfile.TemporaryDirectory() as tmp_dir:
with open(os.path.join(tmp_dir, "setup.py"), "w") as f:
f.write(
f"""
from setuptools import setup, find_namespace_packages
setup(
include_package_data=True,
name="metaflow-stubs",
version="{mf_version}",
description="Metaflow: More Data Science, Less Engineering",
author="Metaflow Developers",
author_email="help@metaflow.org",
license="Apache Software License",
packages=find_namespace_packages(),
package_data={{"metaflow-stubs": ["generated_for.txt", "py.typed", "**/*.pyi"]}},
install_requires=["metaflow=={mf_version}"],
python_requires=">=3.6.1",
)
"""
)
with open(os.path.join(tmp_dir, "MANIFEST.in"), "w") as f:
f.write(
"""
include metaflow-stubs/generated_for.txt
include metaflow-stubs/py.typed
global-include *.pyi
"""
)
StubGenerator(os.path.join(tmp_dir, "metaflow-stubs")).write_out()
subprocess.check_call(
[sys.executable, "-m", "build", "--wheel"],
cwd=tmp_dir,
stderr=subprocess.DEVNULL if ctx.obj.quiet else None,
stdout=subprocess.DEVNULL if ctx.obj.quiet else None,
)
if dist_packages:
# We need to uninstall all the other packages first
pkgs_to_remove = [p[0] for p in dist_packages]
ctx.obj.echo(
"Uninstalling existing packages providing metaflow-stubs: %s"
% ", ".join(pkgs_to_remove)
)
subprocess.check_call(
[
sys.executable,
"-m",
"pip",
"uninstall",
"-y",
*pkgs_to_remove,
],
cwd=tmp_dir,
stderr=subprocess.DEVNULL if ctx.obj.quiet else None,
stdout=subprocess.DEVNULL if ctx.obj.quiet else None,
)
subprocess.check_call(
[
sys.executable,
"-m",
"pip",
"install",
"--force-reinstall",
"--no-deps",
"--no-index",
"--find-links",
os.path.join(tmp_dir, "dist"),
"metaflow-stubs",
],
cwd=tmp_dir,
stderr=subprocess.DEVNULL if ctx.obj.quiet else None,
stdout=subprocess.DEVNULL if ctx.obj.quiet else None,
)
if ctx.obj.quiet:
ctx.obj.echo_always("installed")
else:
ctx.obj.echo("Metaflow stubs successfully installed")
def split_version(vers: str) -> Tuple[str, Optional[str]]:
vers_split = vers.split("+", 1)
if len(vers_split) == 1:
return vers_split[0], None
return vers_split[0], vers_split[1]
def get_mf_version(public: bool = False) -> Tuple[str, Optional[str]]:
from metaflow.metaflow_version import get_version
return split_version(get_version(public))
def get_stubs_version(stubs_root_path: Optional[str]) -> Tuple[str, Optional[str]]:
if stubs_root_path is None:
# The stubs are NOT an integrated part of metaflow
return None, None
if not os.path.isfile(os.path.join(stubs_root_path, "generated_for.txt")):
return None, None
with open(
os.path.join(stubs_root_path, "generated_for.txt"), "r", encoding="utf-8"
) as f:
return split_version(f.read().strip().split(" ", 1)[0])
def internal_check(stubs_path: str, pkg_name: Optional[str] = None) -> Tuple[str, bool]:
mf_version = get_mf_version()
stub_version = get_stubs_version(stubs_path)
if stub_version == (None, None):
return "the installed stubs package does not seem valid", False
elif stub_version != mf_version:
return (
"the stubs package was generated for Metaflow version %s%s "
"but you have Metaflow version %s%s installed."
% (
stub_version[0],
" and extensions %s" % stub_version[1] if stub_version[1] else "",
mf_version[0],
" and extensions %s" % mf_version[1] if mf_version[1] else "",
),
False,
)
return (
"the stubs package %s matches your current Metaflow version"
% (pkg_name if pkg_name else "installed at '%s'" % stubs_path),
True,
)
def get_packages_for_stubs() -> Tuple[List[Tuple[str, str]], List[str]]:
"""
Gets the packages that provide metaflow-stubs.
This returns two lists:
- the first list contains tuples of package names and root path for the package
- the second list contains all non package names (ie: things in path for example)
Returns
-------
Tuple[List[Tuple[str, str]], Optional[List[Tuple[str, str]]]]
Packages or paths providing metaflow-stubs
"""
try:
m = importlib.import_module("metaflow-stubs")
all_paths = set(m.__path__)
except:
return [], []
dist_list = []
# We check the type because if the user has multiple importlib metadata, for
# some reason it shows up multiple times.
interesting_dists = [
d
for d in metadata.distributions()
if any(
[
p == "metaflow-stubs"
for p in (d.read_text("top_level.txt") or "").split()
]
)
and isinstance(d, metadata.PathDistribution)
]
for dist in interesting_dists:
# This is a package we care about
root_path = dist.locate_file("metaflow-stubs").as_posix()
dist_list.append((dist.metadata["Name"], root_path))
all_paths.discard(root_path)
return dist_list, list(all_paths)
def print_status(ctx: click.Context, msg: str, valid: bool):
if ctx.obj.quiet:
ctx.obj.echo_always("valid" if valid else "invalid")
else:
ctx.obj.echo("Metaflow stubs are ", nl=False)
if valid:
ctx.obj.echo("valid", fg="green", nl=False)
else:
ctx.obj.echo("invalid", fg="red", nl=False)
ctx.obj.echo(": " + msg)
return
================================================
FILE: metaflow/cmd/main_cli.py
================================================
import os
from metaflow._vendor import click
from metaflow.extension_support.cmd import process_cmds, resolve_cmds
from metaflow.plugins.datastores.local_storage import LocalStorage
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, CONTACT_INFO
from metaflow.metaflow_version import get_version
from .util import echo_always
import metaflow.tracing as tracing
@click.group()
@tracing.cli("cli/main")
def main():
pass
@main.command(help="Show all available commands.")
@click.pass_context
def help(ctx):
print(ctx.parent.get_help())
@main.command(help="Show flows accessible from the current working tree.")
def status():
from metaflow.client import get_metadata
res = get_metadata()
if res:
res = res.split("@")
else:
raise click.ClickException("Unknown status: cannot find a Metadata provider")
if res[0] == "service":
echo("Using Metadata provider at: ", nl=False)
echo('"%s"\n' % res[1], fg="cyan")
echo("To list available flows, type:\n")
echo("1. python")
echo("2. from metaflow import Metaflow")
echo("3. list(Metaflow())")
return
from metaflow.client import namespace, metadata, Metaflow
# Get the local data store path
path = LocalStorage.get_datastore_root_from_config(echo, create_on_absent=False)
# Throw an exception
if path is None:
raise click.ClickException(
"Could not find "
+ click.style('"%s"' % DATASTORE_LOCAL_DIR, fg="red")
+ " in the current working tree."
)
stripped_path = os.path.dirname(path)
namespace(None)
metadata("local@%s" % stripped_path)
echo("Working tree found at: ", nl=False)
echo('"%s"\n' % stripped_path, fg="cyan")
echo("Available flows:", fg="cyan", bold=True)
for flow in Metaflow():
echo("* %s" % flow, fg="cyan")
CMDS_DESC = [
("configure", ".configure_cmd.cli"),
("tutorials", ".tutorials_cmd.cli"),
("develop", ".develop.cli"),
("code", ".code.cli"),
]
process_cmds(globals())
@click.command(
cls=click.CommandCollection,
sources=[main] + resolve_cmds(),
invoke_without_command=True,
)
@click.pass_context
def start(ctx):
global echo
echo = echo_always
import metaflow
version = get_version()
echo("Metaflow ", fg="magenta", bold=True, nl=False)
if ctx.invoked_subcommand is None:
echo("(%s): " % version, fg="magenta", bold=False, nl=False)
else:
echo("(%s)\n" % version, fg="magenta", bold=False)
if ctx.invoked_subcommand is None:
echo("More AI, less engineering\n", fg="magenta")
lnk_sz = max(len(lnk) for lnk in CONTACT_INFO.values()) + 1
for what, lnk in CONTACT_INFO.items():
echo("%s%s" % (lnk, " " * (lnk_sz - len(lnk))), fg="cyan", nl=False)
echo("- %s" % what)
echo("")
print(ctx.get_help())
if __name__ == "__main__":
start()
================================================
FILE: metaflow/cmd/make_wrapper.py
================================================
import sys
import subprocess
from pathlib import Path
import sysconfig
import site
def find_makefile():
possible_dirs = []
# 1) The standard sysconfig-based location
data_dir = sysconfig.get_paths()["data"]
possible_dirs.append(Path(data_dir) / "share" / "metaflow" / "devtools")
# 2) The user base (e.g. ~/.local on many systems)
user_base = site.getuserbase() # e.g. /home/runner/.local
possible_dirs.append(Path(user_base) / "share" / "metaflow" / "devtools")
# 3) site-packages can vary, we can guess share/.. near each site-packages
# (Works if pip actually placed devtools near site-packages.)
for p in site.getsitepackages():
possible_dirs.append(Path(p).parent / "share" / "metaflow" / "devtools")
user_site = site.getusersitepackages()
possible_dirs.append(Path(user_site).parent / "share" / "metaflow" / "devtools")
for candidate_dir in possible_dirs:
makefile_candidate = candidate_dir / "Makefile"
if makefile_candidate.is_file():
return makefile_candidate
# 4) When developing, Metaflow might be installed with --editable, which means the devtools will not be located within site-packages.
# We read the actual location from package metadata in this case, but only do this heavier operation if the above lookups fail.
try:
import json
from importlib.metadata import Distribution
direct_url = Distribution.from_name("metaflow").read_text("direct_url.json")
if direct_url:
content = json.loads(direct_url)
url = content.get("url", "")
if not url.startswith("file://"):
return None
makefile_candidate = (
Path(url.replace("file://", "")) / "devtools" / "Makefile"
)
if makefile_candidate.is_file():
return makefile_candidate
else:
# No dist metadata found. This is tied to the version of pip being used
# Do not bother with .egg-link installs due to the handling of the file contents being a headache due to lack of a unified spec.
print(
"Could not locate an installation of Metaflow. No package metadata found."
)
print(
"If Metaflow is installed as editable, try upgrading the version of pip and reinstalling in order to generate proper package metadata.\n"
)
except Exception:
return None
return None
def main():
makefile_path = find_makefile()
if not makefile_path:
print("ERROR: Could not find executable in any known location.")
sys.exit(1)
cmd = ["make", "-f", str(makefile_path)] + sys.argv[1:]
try:
completed = subprocess.run(cmd, check=True)
sys.exit(completed.returncode)
except subprocess.CalledProcessError as ex:
sys.exit(ex.returncode)
except KeyboardInterrupt:
print("Process interrupted by user. Exiting cleanly.")
sys.exit(1)
================================================
FILE: metaflow/cmd/tutorials_cmd.py
================================================
import os
import shutil
from metaflow._vendor import click
from .util import echo_always, makedirs
echo = echo_always
@click.group()
def cli():
pass
@cli.group(help="Browse and access the metaflow tutorial episodes.")
def tutorials():
pass
def get_tutorials_dir():
metaflow_dir = os.path.dirname(__file__)
package_dir = os.path.dirname(metaflow_dir)
tutorials_dir = os.path.join(package_dir, "metaflow", "tutorials")
if not os.path.exists(tutorials_dir):
tutorials_dir = os.path.join(package_dir, "tutorials")
return tutorials_dir
def get_tutorial_metadata(tutorial_path):
metadata = {}
with open(os.path.join(tutorial_path, "README.md")) as readme:
content = readme.read()
paragraphs = [paragraph.strip() for paragraph in content.split("#") if paragraph]
metadata["description"] = paragraphs[0].split("**")[1]
header = paragraphs[0].split("\n")
header = header[0].split(":")
metadata["episode"] = header[0].strip()[len("Episode ") :]
metadata["title"] = header[1].strip()
for paragraph in paragraphs[1:]:
if paragraph.startswith("Before playing"):
lines = "\n".join(paragraph.split("\n")[1:])
metadata["prereq"] = lines.replace("```", "")
if paragraph.startswith("Showcasing"):
lines = "\n".join(paragraph.split("\n")[1:])
metadata["showcase"] = lines.replace("```", "")
if paragraph.startswith("To play"):
lines = "\n".join(paragraph.split("\n")[1:])
metadata["play"] = lines.replace("```", "")
return metadata
def get_all_episodes():
episodes = []
for name in sorted(os.listdir(get_tutorials_dir())):
# Skip hidden files (like .gitignore)
if not name.startswith("."):
episodes.append(name)
return episodes
@tutorials.command(help="List the available episodes.")
def list():
echo("Episodes:", fg="cyan", bold=True)
for name in get_all_episodes():
path = os.path.join(get_tutorials_dir(), name)
metadata = get_tutorial_metadata(path)
echo("* {0: <20} ".format(metadata["episode"]), fg="cyan", nl=False)
echo("- {0}".format(metadata["title"]))
echo("\nTo pull the episodes, type: ")
echo("metaflow tutorials pull", fg="cyan")
def validate_episode(episode):
src_dir = os.path.join(get_tutorials_dir(), episode)
if not os.path.isdir(src_dir):
raise click.BadArgumentUsage(
"Episode "
+ click.style('"{0}"'.format(episode), fg="red")
+ " does not exist."
" To see a list of available episodes, "
"type:\n" + click.style("metaflow tutorials list", fg="cyan")
)
def autocomplete_episodes(ctx, args, incomplete):
return [k for k in get_all_episodes() if incomplete in k]
@tutorials.command(help="Pull episodes " "into your current working directory.")
@click.option(
"--episode",
default="",
help="Optional episode name " "to pull only a single episode.",
)
def pull(episode):
tutorials_dir = get_tutorials_dir()
if not episode:
episodes = get_all_episodes()
else:
episodes = [episode]
# Validate that the list is valid.
for episode in episodes:
validate_episode(episode)
# Create destination `metaflow-tutorials` dir.
dst_parent = os.path.join(os.getcwd(), "metaflow-tutorials")
makedirs(dst_parent)
# Pull specified episodes.
for episode in episodes:
dst_dir = os.path.join(dst_parent, episode)
# Check if episode has already been pulled before.
if os.path.exists(dst_dir):
if click.confirm(
"Episode "
+ click.style('"{0}"'.format(episode), fg="red")
+ " has already been pulled before. Do you wish "
"to delete the existing version?"
):
shutil.rmtree(dst_dir)
else:
continue
echo("Pulling episode ", nl=False)
echo('"{0}"'.format(episode), fg="cyan", nl=False)
# TODO: Is the following redundant?
echo(" into your current working directory.")
# Copy from (local) metaflow package dir to current.
src_dir = os.path.join(tutorials_dir, episode)
shutil.copytree(src_dir, dst_dir)
echo("\nTo know more about an episode, type:\n", nl=False)
echo("metaflow tutorials info [EPISODE]", fg="cyan")
@tutorials.command(help="Find out more about an episode.")
@click.argument("episode", autocompletion=autocomplete_episodes)
def info(episode):
validate_episode(episode)
src_dir = os.path.join(get_tutorials_dir(), episode)
metadata = get_tutorial_metadata(src_dir)
echo("Synopsis:", fg="cyan", bold=True)
echo("%s" % metadata["description"])
echo("\nShowcasing:", fg="cyan", bold=True, nl=True)
echo("%s" % metadata["showcase"])
if "prereq" in metadata:
echo("\nBefore playing:", fg="cyan", bold=True, nl=True)
echo("%s" % metadata["prereq"])
echo("\nTo play:", fg="cyan", bold=True)
echo("%s" % metadata["play"])
================================================
FILE: metaflow/cmd/util.py
================================================
import os
from metaflow._vendor import click
def makedirs(path):
# This is for python2 compatibility.
# Python3 has os.makedirs(exist_ok=True).
try:
os.makedirs(path)
except OSError as x:
if x.errno == 17:
return
else:
raise
def echo_dev_null(*args, **kwargs):
pass
def echo_always(line, **kwargs):
click.secho(line, **kwargs)
================================================
FILE: metaflow/cmd_with_io.py
================================================
import subprocess
from .exception import ExternalCommandFailed
from metaflow.util import to_bytes
def cmd(cmdline, input, output):
for path, data in input.items():
with open(path, "wb") as f:
f.write(to_bytes(data))
if subprocess.call(cmdline, shell=True):
raise ExternalCommandFailed(
"Command '%s' returned a non-zero " "exit code." % cmdline
)
out = []
for path in output:
with open(path, "rb") as f:
out.append(f.read())
if len(out) == 1:
return out[0]
else:
return out
================================================
FILE: metaflow/datastore/__init__.py
================================================
from .inputs import Inputs
from .flow_datastore import FlowDataStore
from .datastore_set import TaskDataStoreSet
from .task_datastore import TaskDataStore
from .spin_datastore import SpinTaskDatastore
================================================
FILE: metaflow/datastore/content_addressed_store.py
================================================
import gzip
from collections import namedtuple
from hashlib import sha1
from io import BytesIO
from ..exception import MetaflowInternalError
from .exceptions import DataException
class ContentAddressedStore(object):
"""
This class is not meant to be overridden and is meant to be common across
different datastores.
"""
save_blobs_result = namedtuple("save_blobs_result", "uri key")
def __init__(self, prefix, storage_impl):
"""
Initialize a ContentAddressedStore
A content-addressed store stores data using a name/key that is a hash
of the content. This means that duplicate content is only stored once.
Parameters
----------
prefix : string
Prefix that will be prepended when storing a file
storage_impl : type
Implementation for the backing storage implementation to use
"""
self._prefix = prefix
self._storage_impl = storage_impl
self.TYPE = self._storage_impl.TYPE
self._blob_cache = None
def set_blob_cache(self, blob_cache):
self._blob_cache = blob_cache
def save_blobs(self, blob_iter, raw=False, len_hint=0, is_transfer=False):
"""
Saves blobs of data to the datastore
The blobs of data are saved as is if raw is True. If raw is False, the
datastore may process the blobs and they should then only be loaded
using load_blob
NOTE: The idea here is that there are two modes to access the file once
it is saved to the datastore:
- if raw is True, you would be able to access it directly using the
URI returned; the bytes that are passed in as 'blob' would be
returned directly by reading the object at that URI. You would also
be able to access it using load_blob passing the key returned
- if raw is False, no URI would be returned (the URI would be None)
and you would only be able to access the object using load_blob.
- The API also specifically takes a list to allow for parallel writes
if available in the datastore. We could also make a single
save_blob' API and save_blobs but this seems superfluous
Parameters
----------
blob_iter : Iterator
Iterator over bytes objects to save
raw : bool, default False
Whether to save the bytes directly or process them, by default False
len_hint : int, default 0
Hint of the number of blobs that will be produced by the
iterator, by default 0
is_transfer : bool, default False
If True, this indicates we are saving blobs directly from the output of another
content addressed store's
Returns
-------
List of save_blobs_result:
The list order is the same as the blobs passed in. The URI will be
None if raw is False.
"""
results = []
def packing_iter():
for blob in blob_iter:
if is_transfer:
key, blob_data, meta = blob
path = self._storage_impl.path_join(self._prefix, key[:2], key)
# Transfer data is always raw/decompressed, so mark it as such
meta_corrected = {"cas_raw": True, "cas_version": 1}
results.append(
self.save_blobs_result(
uri=self._storage_impl.full_uri(path),
key=key,
)
)
yield path, (BytesIO(blob_data), meta_corrected)
continue
sha = sha1(blob).hexdigest()
path = self._storage_impl.path_join(self._prefix, sha[:2], sha)
results.append(
self.save_blobs_result(
uri=self._storage_impl.full_uri(path) if raw else None,
key=sha,
)
)
if not self._storage_impl.is_file([path])[0]:
# only process blobs that don't exist already in the
# backing datastore
meta = {"cas_raw": raw, "cas_version": 1}
if raw:
yield path, (BytesIO(blob), meta)
else:
yield path, (self._pack_v1(blob), meta)
# We don't actually want to overwrite but by saying =True, we avoid
# checking again saving some operations. We are already sure we are not
# sending duplicate files since we already checked.
self._storage_impl.save_bytes(packing_iter(), overwrite=True, len_hint=len_hint)
return results
def load_blobs(self, keys, force_raw=False, is_transfer=False):
"""
Mirror function of save_blobs
This function is guaranteed to return the bytes passed to save_blob for
the keys
Parameters
----------
keys : List of string
Key describing the object to load
force_raw : bool, default False
Support for backward compatibility with previous datastores. If
True, this will force the key to be loaded as is (raw). By default,
False
is_transfer : bool, default False
If True, this indicates we are loading blobs to transfer them directly
to another datastore. We will, in this case, also transfer the metadata
and do minimal processing. This is for internal use only.
Returns
-------
Returns an iterator of (string, bytes) tuples; the iterator may return keys
in a different order than were passed in. If is_transfer is True, the tuple
has three elements with the third one being the metadata.
"""
load_paths = []
for key in keys:
blob = None
if self._blob_cache:
blob = self._blob_cache.load_key(key)
if blob is not None:
if is_transfer:
# Cached blobs are decompressed/processed bytes regardless of original format
yield key, blob, {"cas_raw": False, "cas_version": 1}
else:
yield key, blob
else:
path = self._storage_impl.path_join(self._prefix, key[:2], key)
load_paths.append((key, path))
with self._storage_impl.load_bytes([p for _, p in load_paths]) as loaded:
for path_key, file_path, meta in loaded:
key = self._storage_impl.path_split(path_key)[-1]
# At this point, we either return the object as is (if raw) or
# decode it according to the encoding version
with open(file_path, "rb") as f:
if force_raw or (meta and meta.get("cas_raw", False)):
blob = f.read()
else:
if meta is None:
# Previous version of the datastore had no meta
# information
unpack_code = self._unpack_backward_compatible
else:
version = meta.get("cas_version", -1)
if version == -1:
raise DataException(
"Could not extract encoding version for '%s'" % path
)
unpack_code = getattr(self, "_unpack_v%d" % version, None)
if unpack_code is None:
raise DataException(
"Unknown encoding version %d for '%s' -- "
"the artifact is either corrupt or you "
"need to update Metaflow to the latest "
"version" % (version, path)
)
try:
blob = unpack_code(f)
except Exception as e:
raise DataException(
"Could not unpack artifact '%s': %s" % (path, e)
)
if self._blob_cache:
self._blob_cache.store_key(key, blob)
if is_transfer:
yield key, blob, meta # Preserve exact original metadata from storage
else:
yield key, blob
def _unpack_backward_compatible(self, blob):
# This is the backward compatible unpack
# (if the blob doesn't have a version encoded)
return self._unpack_v1(blob)
def _pack_v1(self, blob):
buf = BytesIO()
with gzip.GzipFile(fileobj=buf, mode="wb", compresslevel=3) as f:
f.write(blob)
buf.seek(0)
return buf
def _unpack_v1(self, blob):
with gzip.GzipFile(fileobj=blob, mode="rb") as f:
return f.read()
class BlobCache(object):
def load_key(self, key):
pass
def store_key(self, key, blob):
pass
================================================
FILE: metaflow/datastore/datastore_set.py
================================================
import json
from io import BytesIO
from .exceptions import DataException
from .content_addressed_store import BlobCache
"""
TaskDataStoreSet allows you to prefetch multiple (read) datastores into a
cache and lets you access them. As a performance optimization it also lets you
prefetch select data artifacts leveraging a shared cache.
"""
class TaskDataStoreSet(object):
def __init__(
self,
flow_datastore,
run_id,
steps=None,
pathspecs=None,
prefetch_data_artifacts=None,
allow_not_done=False,
join_type=None,
orig_flow_datastore=None,
spin_artifacts=None,
):
self.task_datastores = flow_datastore.get_task_datastores(
run_id,
steps=steps,
pathspecs=pathspecs,
allow_not_done=allow_not_done,
join_type=join_type,
orig_flow_datastore=orig_flow_datastore,
spin_artifacts=spin_artifacts,
)
if prefetch_data_artifacts:
# produce a set of SHA keys to prefetch based on artifact names
prefetch = set()
for ds in self.task_datastores:
prefetch.update(ds.keys_for_artifacts(prefetch_data_artifacts))
# ignore missing keys
prefetch.discard(None)
# prefetch artifacts and share them with all datastores
# in this DatastoreSet
preloaded = dict(flow_datastore.ca_store.load_blobs(prefetch))
cache = ImmutableBlobCache(preloaded)
flow_datastore.ca_store.set_blob_cache(cache)
self.pathspec_index_cache = {}
self.pathspec_cache = {}
if not allow_not_done:
for ds in self.task_datastores:
self.pathspec_index_cache[ds.pathspec_index] = ds
self.pathspec_cache[ds.pathspec] = ds
def get_with_pathspec(self, pathspec):
return self.pathspec_cache.get(pathspec, None)
def get_with_pathspec_index(self, pathspec_index):
return self.pathspec_index_cache.get(pathspec_index, None)
def __iter__(self):
for v in self.task_datastores:
yield v
"""
This class ensures that blobs that correspond to artifacts that
are common to all datastores in this set are only loaded once
"""
class ImmutableBlobCache(BlobCache):
def __init__(self, preloaded):
self._preloaded = preloaded
def load_key(self, key):
return self._preloaded.get(key)
def store_key(self, key, blob):
# we cache only preloaded keys, so no need to store anything
pass
================================================
FILE: metaflow/datastore/datastore_storage.py
================================================
from collections import namedtuple
import re
from .exceptions import DataException
class CloseAfterUse(object):
"""
Class that can be used to wrap data and a closer (cleanup code).
This class should be used in a with statement and, when the with
scope exits, `close` will be called on the closer object
"""
def __init__(self, data, closer=None):
self.data = data
self._closer = closer
def __enter__(self):
return self.data
def __exit__(self, exc_type, exc_val, exc_tb):
if self._closer:
self._closer.close()
class DataStoreStorage(object):
"""
A DataStoreStorage defines the interface of communication between the
higher-level datastores and the actual storage system.
Both the ContentAddressedStore and the TaskDataStore use these methods to
read/write/list from the actual storage system. These methods are meant to
be low-level; they are in a class to provide better abstraction but this
class itself is not meant to be initialized.
"""
TYPE = None
datastore_root = None
path_rexp = None
list_content_result = namedtuple("list_content_result", "path is_file")
def __init__(self, root=None):
self.datastore_root = root if root else self.datastore_root
@classmethod
def get_datastore_root_from_config(cls, echo, create_on_absent=True):
"""Returns a default choice for datastore_root from metaflow_config
Parameters
----------
echo : function
Function to use to print out messages
create_on_absent : bool, optional
Create the datastore root if it doesn't exist, by default True
"""
raise NotImplementedError
@classmethod
def get_datastore_root_from_location(cls, path, flow_name):
"""Extracts the datastore_root location from a path using
a content-addressed store.
NOTE: This leaks some detail of the content-addressed store so not ideal
This method will raise an exception if the flow_name is not as expected
Parameters
----------
path : str
Location from which to extract the datastore root value
flow_name : str
Flow name (for verification purposes)
Returns
-------
str
The datastore_root value that can be used to initialize an instance
of this datastore storage.
Raises
------
DataException
Raised if the path is not a valid path from this datastore.
"""
if cls.path_rexp is None:
cls.path_rexp = re.compile(
cls.path_join(
"(?P.*)",
"(?P[_a-zA-Z][_a-zA-Z0-9]+)",
"data",
"(?P[0-9a-f]{2})",
"(?:r_)?(?P=init)[0-9a-f]{38}",
)
)
m = cls.path_rexp.match(path)
if not m or m.group("flow_name") != flow_name:
raise DataException(
"Location '%s' does not correspond to a valid location for "
"flow '%s'." % (path, flow_name)
)
return m.group("root")
@classmethod
def path_join(cls, *components):
if len(components) == 0:
return ""
component = components[0].rstrip("/")
components = [component] + [c.strip("/") for c in components[1:]]
return "/".join(components)
@classmethod
def path_split(cls, path):
return path.split("/")
@classmethod
def basename(cls, path):
return path.split("/")[-1]
@classmethod
def dirname(cls, path):
return path.rsplit("/", 1)[0]
def full_uri(self, path):
return self.path_join(self.datastore_root, path)
def is_file(self, paths):
"""
Returns True or False depending on whether path refers to a valid
file-like object
This method returns False if path points to a directory
Parameters
----------
path : List[string]
Path to the object
Returns
-------
List[bool]
"""
raise NotImplementedError
def info_file(self, path):
"""
Returns a tuple where the first element is True or False depending on
whether path refers to a valid file-like object (like is_file) and the
second element is a dictionary of metadata associated with the file or
None if the file does not exist or there is no metadata.
Parameters
----------
path : string
Path to the object
Returns
-------
tuple
(bool, dict)
"""
raise NotImplementedError
def size_file(self, path):
"""
Returns file size at the indicated 'path', or None if file can not be found.
Parameters
----------
path : string
Path to the object
Returns
-------
Optional
int
"""
raise NotImplementedError
def list_content(self, paths):
"""
Lists the content of the datastore in the directory indicated by 'paths'.
This is similar to executing a 'ls'; it will only list the content one
level down and simply returns the paths to the elements present as well
as whether or not those elements are files (if not, they are further
directories that can be traversed)
The path returned always include the path passed in. As an example,
if your filesystem contains the files: A/b.txt A/c.txt and the directory
A/D, on return, you would get, for an input of ['A']:
[('A/b.txt', True), ('A/c.txt', True), ('A/D', False)]
Parameters
----------
paths : List[string]
Directories to list
Returns
-------
List[list_content_result]
Content of the directory
"""
raise NotImplementedError
def save_bytes(self, path_and_bytes_iter, overwrite=False, len_hint=0):
"""
Creates objects and stores them in the datastore.
If overwrite is False, any existing object will not be overwritten and
an error will be returned.
The objects are specified in an iterator over (path, obj) tuples where
the path is the path to store the object and the value is a file-like
object from which bytes can be read.
Parameters
----------
path_and_bytes_iter : Iterator[(string, (RawIOBase|BufferedIOBase, metadata))]
Iterator over objects to store; the first element in the outermost
tuple is the path to store the bytes at. The second element in the
outermost tuple is either a RawIOBase or BufferedIOBase or a tuple
where the first element is a RawIOBase or BufferedIOBase and the
second element is a dictionary of metadata to associate with the
object.
Keys for the metadata must be ascii only string and elements
can be anything that can be converted to a string using json.dumps.
If you have no metadata, you can simply pass a RawIOBase or
BufferedIOBase.
overwrite : bool
True if the objects can be overwritten. Defaults to False.
Even when False, it is NOT an error condition to see an existing object.
Simply do not perform the upload operation.
len_hint : int
Estimated number of items produced by the iterator
Returns
-------
None
"""
raise NotImplementedError
def load_bytes(self, keys):
"""
Gets objects from the datastore
Note that objects may be fetched in parallel so if order is important
for your consistency model, the caller is responsible for calling this
multiple times in the proper order.
Parameters
----------
keys : List[string]
Keys to fetch
Returns
-------
CloseAfterUse :
A CloseAfterUse which should be used in a with statement. The data
in the CloseAfterUse will be an iterator over (key, file_path, metadata)
tuples. File_path and metadata will be None if the key was missing.
Metadata will be None if no metadata is present; otherwise it is
a dictionary of metadata associated with the object.
Note that the file at `file_path` may no longer be accessible outside
the scope of the returned object.
The order of items in the list is not to be relied on (ie: rely on the key
in the returned tuple and not on the order of the list). This function will,
however, return as many elements as passed in even in the presence of
duplicate keys.
"""
raise NotImplementedError
================================================
FILE: metaflow/datastore/exceptions.py
================================================
from ..exception import MetaflowException
class DataException(MetaflowException):
headline = "Data store error"
class UnpicklableArtifactException(MetaflowException):
headline = "Cannot pickle artifact"
def __init__(self, artifact_name):
msg = 'Cannot pickle dump artifact named "%s"' % artifact_name
super().__init__(msg=msg, lineno=None)
================================================
FILE: metaflow/datastore/flow_datastore.py
================================================
import itertools
import json
from abc import ABC, abstractmethod
from .. import metaflow_config
from .content_addressed_store import ContentAddressedStore
from .task_datastore import TaskDataStore
from .spin_datastore import SpinTaskDatastore
from ..metaflow_profile import from_start
class FlowDataStore(object):
default_storage_impl = None
def __init__(
self,
flow_name,
environment=None,
metadata=None,
event_logger=None,
monitor=None,
storage_impl=None,
ds_root=None,
):
"""
Initialize a Flow level datastore.
This datastore can then be used to get TaskDataStore to store artifacts
and metadata about a task as well as a ContentAddressedStore to store
things like packages, etc.
Parameters
----------
flow_name : str
The name of the flow
environment : MetaflowEnvironment, optional
Environment this datastore is operating in
metadata : MetadataProvider, optional
The metadata provider to use and update if needed, by default None
event_logger : EventLogger, optional
EventLogger to use to report events, by default None
monitor : Monitor, optional
Monitor to use to measure/monitor events, by default None
storage_impl : type
Class for the backing DataStoreStorage to use; if not provided use
default_storage_impl, optional
ds_root : str
The optional root for this datastore; if not provided, use the
default for the DataStoreStorage, optional
"""
storage_impl = storage_impl if storage_impl else self.default_storage_impl
if storage_impl is None:
raise RuntimeError("No datastore storage implementation specified")
self._storage_impl = storage_impl(ds_root)
self.TYPE = self._storage_impl.TYPE
# Public attributes
self.flow_name = flow_name
self.environment = environment
self.metadata = metadata
self.logger = event_logger
self.monitor = monitor
self.ca_store = ContentAddressedStore(
self._storage_impl.path_join(self.flow_name, "data"), self._storage_impl
)
# Private
self._metadata_cache = None
@property
def datastore_root(self):
return self._storage_impl.datastore_root
def set_metadata_cache(self, cache):
self._metadata_cache = cache
def get_task_datastores(
self,
run_id=None,
steps=None,
pathspecs=None,
allow_not_done=False,
attempt=None,
include_prior=False,
mode="r",
join_type=None,
orig_flow_datastore=None,
spin_artifacts=None,
):
"""
Return a list of TaskDataStore for a subset of the tasks.
We filter the list based on `steps` if non-None.
Alternatively, `pathspecs` can contain the exact list of pathspec(s)
(run_id/step_name/task_id) that should be filtered.
Note: When `pathspecs` is specified, we expect strict consistency and
not eventual consistency in contrast to other modes.
Parameters
----------
run_id : str, optional
Run ID to get the tasks from. If not specified, use pathspecs,
by default None
steps : List[str] , optional
Steps to get the tasks from. If run_id is specified, this
must also be specified, by default None
pathspecs : List[str], optional
Full task specs (run_id/step_name/task_id[/attempt]). Can be used instead of
specifying run_id and steps, by default None
allow_not_done : bool, optional
If True, returns the latest attempt of a task even if that attempt
wasn't marked as done, by default False
attempt : int, optional
Attempt number of the tasks to return. If not provided, returns latest attempt.
include_prior : boolean, default False
If True, returns all attempts up to and including attempt.
mode : str, default "r"
Mode to initialize the returned TaskDataStores in.
join_type : str, optional, default None
If specified, the join type for the task. This is used to determine
the user specified artifacts for the task in case of a spin task.
orig_flow_datastore : MetadataProvider, optional, default None
The metadata provider in case of a spin task. If provided, the
returned TaskDataStore will be a SpinTaskDatastore instead of a
TaskDataStore.
spin_artifacts : Dict[str, Any], optional, default None
Artifacts provided by user that can override the artifacts fetched via the
spin pathspec.
Returns
-------
List[TaskDataStore]
Task datastores for all the tasks specified.
"""
task_urls = []
# Note: When `pathspecs` is specified, we avoid the potentially
# eventually consistent `list_content` operation, and directly construct
# the task_urls list.
if pathspecs:
task_urls = [
self._storage_impl.path_join(self.flow_name, pathspec)
for pathspec in pathspecs
]
else:
run_prefix = self._storage_impl.path_join(self.flow_name, run_id)
if steps:
step_urls = [
self._storage_impl.path_join(run_prefix, step) for step in steps
]
else:
step_urls = [
step.path
for step in self._storage_impl.list_content([run_prefix])
if step.is_file is False
]
task_urls = [
task.path
for task in self._storage_impl.list_content(step_urls)
if task.is_file is False
]
urls = []
# parse content urls for specific attempt only, or for all attempts in max range
attempt_range = range(metaflow_config.MAX_ATTEMPTS)
# we have no reason to check for attempts greater than MAX_ATTEMPTS, as they do not exist.
if attempt is not None and attempt <= metaflow_config.MAX_ATTEMPTS - 1:
attempt_range = range(attempt + 1) if include_prior else [attempt]
for task_url in task_urls:
# task_url can have a trailing slash, so strip this to avoid empty strings in the split
task_splits = task_url.rstrip("/").split("/")
# Usually it is flow, run, step, task (so 4 components) -- if we have a
# fifth one, there is a specific attempt number listed as well.
task_attempt_range = attempt_range
if len(task_splits) == 5:
task_attempt_range = [int(task_splits[4])]
for attempt in task_attempt_range:
for suffix in [
TaskDataStore.METADATA_DATA_SUFFIX,
TaskDataStore.METADATA_ATTEMPT_SUFFIX,
TaskDataStore.METADATA_DONE_SUFFIX,
]:
urls.append(
self._storage_impl.path_join(
task_url,
TaskDataStore.metadata_name_for_attempt(suffix, attempt),
)
)
latest_started_attempts = {}
done_attempts = set()
data_objs = {}
with self._storage_impl.load_bytes(urls) as get_results:
for key, path, meta in get_results:
if path is not None:
_, run, step, task, fname = self._storage_impl.path_split(key)
attempt, fname = TaskDataStore.parse_attempt_metadata(fname)
attempt = int(attempt)
if fname == TaskDataStore.METADATA_DONE_SUFFIX:
done_attempts.add((run, step, task, attempt))
elif fname == TaskDataStore.METADATA_ATTEMPT_SUFFIX:
latest_started_attempts[(run, step, task)] = max(
latest_started_attempts.get((run, step, task), 0), attempt
)
elif fname == TaskDataStore.METADATA_DATA_SUFFIX:
# This somewhat breaks the abstraction since we are using
# load_bytes directly instead of load_metadata
with open(path, encoding="utf-8") as f:
data_objs[(run, step, task, attempt)] = json.load(f)
# We now figure out the latest attempt that started *and* finished.
# Note that if an attempt started but didn't finish, we do *NOT* return
# the previous attempt
latest_started_attempts = set(
(run, step, task, attempt)
for (run, step, task), attempt in latest_started_attempts.items()
)
if allow_not_done:
latest_to_fetch = (
done_attempts.union(latest_started_attempts)
if include_prior
else latest_started_attempts
)
else:
latest_to_fetch = (
done_attempts
if include_prior
else (latest_started_attempts & done_attempts)
)
latest_to_fetch = [
(
v[0],
v[1],
v[2],
v[3],
data_objs.get(v),
mode,
allow_not_done,
join_type,
orig_flow_datastore,
spin_artifacts,
)
for v in latest_to_fetch
]
return list(itertools.starmap(self.get_task_datastore, latest_to_fetch))
def get_task_datastore(
self,
run_id,
step_name,
task_id,
attempt=None,
data_metadata=None,
mode="r",
allow_not_done=False,
join_type=None,
orig_flow_datastore=None,
spin_artifacts=None,
persist=True,
):
if orig_flow_datastore is not None:
# In spin step subprocess, use SpinTaskDatastore for accessing artifacts
if join_type is not None:
# If join_type is specified, we need to use the artifacts corresponding
# to that particular join index, specified by the parent task pathspec.
spin_artifacts = spin_artifacts.get(
f"{run_id}/{step_name}/{task_id}", {}
)
from_start(
"FlowDataStore: get_task_datastore for spin task for type %s %s metadata"
% (self.TYPE, "without" if data_metadata is None else "with")
)
# Get the task datastore for the spun task.
orig_datastore = orig_flow_datastore.get_task_datastore(
run_id,
step_name,
task_id,
attempt=attempt,
data_metadata=data_metadata,
mode=mode,
allow_not_done=allow_not_done,
persist=persist,
)
return SpinTaskDatastore(
self.flow_name,
run_id,
step_name,
task_id,
orig_datastore,
spin_artifacts,
)
cache_hit = False
if (
self._metadata_cache is not None
and data_metadata is None
and attempt is not None
and allow_not_done is False
):
# If we have a metadata cache, we can try to load the metadata
# from the cache if it is not provided.
data_metadata = self._metadata_cache.load_metadata(
run_id, step_name, task_id, attempt
)
cache_hit = data_metadata is not None
from_start(
"FlowDataStore: get_task_datastore for regular task for type %s %s metadata"
% (self.TYPE, "without" if data_metadata is None else "with")
)
task_datastore = TaskDataStore(
self,
run_id,
step_name,
task_id,
attempt=attempt,
data_metadata=data_metadata,
mode=mode,
allow_not_done=allow_not_done,
persist=persist,
)
# Only persist in cache if it is non-changing (so done only) and we have
# a non-None attempt
if (
not cache_hit
and self._metadata_cache is not None
and allow_not_done is False
and attempt is not None
):
self._metadata_cache.store_metadata(
run_id, step_name, task_id, attempt, task_datastore.ds_metadata
)
return task_datastore
def save_data(self, data_iter, len_hint=0):
"""Saves data to the underlying content-addressed store
Parameters
----------
data_iter : Iterator[bytes]
Iterator over blobs to save; each item in the list will be saved individually.
len_hint : int
Estimate of the number of items that will be produced by the iterator,
by default 0.
Returns
-------
(str, str)
Tuple containing the URI to access the saved resource as well as
the key needed to retrieve it using load_data. This is returned in
the same order as the input.
"""
save_results = self.ca_store.save_blobs(data_iter, raw=True, len_hint=len_hint)
return [(r.uri, r.key) for r in save_results]
def load_data(self, keys, force_raw=False):
"""Retrieves data from the underlying content-addressed store
Parameters
----------
keys : List[str]
Keys to retrieve
force_raw : bool, optional
Backward compatible mode. Raw data will be properly identified with
metadata information but older datastores did not do this. If you
know the data should be handled as raw data, set this to True,
by default False
Returns
-------
Iterator[bytes]
Iterator over (key, blob) tuples
"""
for key, blob in self.ca_store.load_blobs(keys, force_raw=force_raw):
yield key, blob
class MetadataCache(ABC):
@abstractmethod
def load_metadata(self, run_id, step_name, task_id, attempt):
raise NotImplementedError()
@abstractmethod
def store_metadata(self, run_id, step_name, task_id, attempt, metadata_dict):
raise NotImplementedError()
================================================
FILE: metaflow/datastore/inputs.py
================================================
class Inputs(object):
"""
split: inputs.step_a.x inputs.step_b.x
foreach: inputs[0].x
both: (inp.x for inp in inputs)
"""
def __init__(self, flows):
# TODO sort by foreach index
self.flows = list(flows)
for flow in self.flows:
setattr(self, flow._current_step, flow)
def __getitem__(self, idx):
return self.flows[idx]
def __iter__(self):
return iter(self.flows)
================================================
FILE: metaflow/datastore/spin_datastore.py
================================================
from typing import Dict, Any
from .task_datastore import TaskDataStore, require_mode
from ..metaflow_profile import from_start
class SpinTaskDatastore(object):
def __init__(
self,
flow_name: str,
run_id: str,
step_name: str,
task_id: str,
orig_datastore: TaskDataStore,
spin_artifacts: Dict[str, Any],
):
"""
SpinTaskDatastore is a datastore for a task that is used to retrieve
artifacts and attributes for a spin step. It uses the task pathspec
from a previous execution of the step to access the artifacts and attributes.
Parameters:
-----------
flow_name : str
Name of the flow
run_id : str
Run ID of the flow
step_name : str
Name of the step
task_id : str
Task ID of the step
orig_datastore : TaskDataStore
The datastore for the underlying task that is being spun.
spin_artifacts : Dict[str, Any]
User provided artifacts that are to be used in the spin task. This is a dictionary
where keys are artifact names and values are the actual data or metadata.
"""
self.flow_name = flow_name
self.run_id = run_id
self.step_name = step_name
self.task_id = task_id
self.orig_datastore = orig_datastore
self.spin_artifacts = spin_artifacts
self._task = None
# Update _objects and _info in order to persist artifacts
# See `persist` method in `TaskDatastore` for more details
self._objects = self.orig_datastore._objects.copy()
self._info = self.orig_datastore._info.copy()
# We strip out some of the control ones
for key in ("_transition",):
if key in self._objects:
del self._objects[key]
del self._info[key]
from_start("SpinTaskDatastore: Initialized artifacts")
@require_mode(None)
def __getitem__(self, name):
try:
# Check if it's an artifact in the spin_artifacts
return self.spin_artifacts[name]
except KeyError:
try:
# Check if it's an attribute of the task
# _foreach_stack, _foreach_index, ...
return self.orig_datastore[name]
except (KeyError, AttributeError) as e:
raise KeyError(
f"Attribute '{name}' not found in the previous execution "
f"of the tasks for `{self.step_name}`."
) from e
@require_mode(None)
def is_none(self, name):
val = self.__getitem__(name)
return val is None
@require_mode(None)
def __contains__(self, name):
try:
_ = self.__getitem__(name)
return True
except KeyError:
return False
@require_mode(None)
def items(self):
if self._objects:
return self._objects.items()
return {}
================================================
FILE: metaflow/datastore/task_datastore.py
================================================
from collections import defaultdict
import json
import pickle
import sys
import time
from functools import wraps
from io import BufferedIOBase, FileIO, RawIOBase
from typing import List, Optional
from types import MethodType, FunctionType
from .. import metaflow_config
from ..exception import MetaflowInternalError
from ..metadata_provider import DataArtifact, MetaDatum
from ..parameters import Parameter
from ..util import Path, is_stringish, to_fileobj
from .exceptions import DataException, UnpicklableArtifactException
_included_file_type = ""
def only_if_not_done(f):
@wraps(f)
def method(self, *args, **kwargs):
if self._is_done_set:
raise MetaflowInternalError(
"Tried to write to datastore "
"(method %s) after it was marked "
".done()" % f.__name__
)
return f(self, *args, **kwargs)
return method
def require_mode(mode):
def wrapper(f):
@wraps(f)
def method(self, *args, **kwargs):
if mode is not None and self._mode != mode:
raise MetaflowInternalError(
"Attempting a datastore operation '%s' requiring mode '%s' "
"but have mode '%s'" % (f.__name__, mode, self._mode)
)
return f(self, *args, **kwargs)
return method
return wrapper
class ArtifactTooLarge(object):
def __str__(self):
return "< artifact too large >"
class TaskDataStore(object):
"""
TaskDataStore is obtained through FlowDataStore.get_datastore_for_task and
is used to store three things:
- Task artifacts (using save_artifacts and load_artifacts) which will
ultimately be stored using ContentAddressedStore's save_blobs and
load_blobs. This is basically the content indexed portion of the
storage (identical objects are stored only once).
- Metadata information (using save_metadata and load_metadata) which
stores JSON encoded metadata about a task in a non-content indexed
way in a hierarchical manner (ie: the files are stored
in a path indicated by the pathspec (run_id/step_name/task_id)).
This portion of the store can be viewed as name indexed (storing
two metadata items with the same name will overwrite the previous item
so the condition of equality is the name as
opposed to the content).
- Logs which are a special sort of task metadata but are handled
differently (they are not JSON-encodable dictionaries).
"""
METADATA_ATTEMPT_SUFFIX = "attempt.json"
METADATA_DONE_SUFFIX = "DONE.lock"
METADATA_DATA_SUFFIX = "data.json"
@staticmethod
def metadata_name_for_attempt(name, attempt):
if attempt is None:
return name
return "%d.%s" % (attempt, name)
@staticmethod
def parse_attempt_metadata(name):
return name.split(".", 1)
def __init__(
self,
flow_datastore,
run_id,
step_name,
task_id,
attempt=None,
data_metadata=None,
mode="r",
allow_not_done=False,
persist=True,
):
self._storage_impl = flow_datastore._storage_impl
self.TYPE = self._storage_impl.TYPE
self._ca_store = flow_datastore.ca_store
self._environment = flow_datastore.environment
self._run_id = run_id
self._step_name = step_name
self._task_id = task_id
self._path = self._storage_impl.path_join(
flow_datastore.flow_name, run_id, step_name, task_id
)
self._mode = mode
self._attempt = attempt
self._metadata = flow_datastore.metadata
self._parent = flow_datastore
self._persist = persist
# The GZIP encodings are for backward compatibility
self._encodings = {"pickle-v2", "gzip+pickle-v2"}
ver = sys.version_info[0] * 10 + sys.version_info[1]
if ver >= 36:
self._encodings.add("pickle-v4")
self._encodings.add("gzip+pickle-v4")
self._is_done_set = False
# If the mode is 'write', we initialize things to empty
if self._mode == "w":
self._objects = {}
self._info = {}
elif self._mode == "r":
if data_metadata is not None:
# We already loaded the data metadata so just use that
self._objects = data_metadata.get("objects", {})
self._info = data_metadata.get("info", {})
else:
# What is the latest attempt ID for this task store.
# NOTE: We *only* access to the data if the attempt that
# produced it is done. In particular, we do not allow access to
# a past attempt if a new attempt has started to avoid
# inconsistencies (depending on when the user accesses the
# datastore, the data may change). We make an exception to that
# rule when allow_not_done is True which allows access to things
# like logs even for tasks that did not write a done marker
max_attempt = None
for i in range(metaflow_config.MAX_ATTEMPTS):
check_meta = self._metadata_name_for_attempt(
self.METADATA_ATTEMPT_SUFFIX, i
)
if self.has_metadata(check_meta, add_attempt=False):
max_attempt = i
elif max_attempt is not None:
break
if self._attempt is None:
self._attempt = max_attempt
elif max_attempt is None or self._attempt > max_attempt:
# In this case the attempt does not exist, so we can't load
# anything
self._objects = {}
self._info = {}
return
# Check if the latest attempt was completed successfully except
# if we have allow_not_done
data_obj = None
if self.has_metadata(self.METADATA_DONE_SUFFIX):
data_obj = self.load_metadata([self.METADATA_DATA_SUFFIX])
data_obj = data_obj[self.METADATA_DATA_SUFFIX]
elif self._attempt is None or not allow_not_done:
raise DataException(
"No completed attempts of the task was found for task '%s'"
% self._path
)
if data_obj is not None:
self._objects = data_obj.get("objects", {})
self._info = data_obj.get("info", {})
elif self._mode == "d":
self._objects = {}
self._info = {}
if self._attempt is None:
for i in range(metaflow_config.MAX_ATTEMPTS):
check_meta = self._metadata_name_for_attempt(
self.METADATA_ATTEMPT_SUFFIX, i
)
if self.has_metadata(check_meta, add_attempt=False):
self._attempt = i
# Do not allow destructive operations on the datastore if attempt is still in flight
# and we explicitly did not allow operating on running tasks.
if not allow_not_done and not self.has_metadata(self.METADATA_DONE_SUFFIX):
raise DataException(
"No completed attempts of the task was found for task '%s'"
% self._path
)
else:
raise DataException("Unknown datastore mode: '%s'" % self._mode)
@property
def pathspec(self):
return "/".join([self.run_id, self.step_name, self.task_id])
@property
def run_id(self):
return self._run_id
@property
def step_name(self):
return self._step_name
@property
def task_id(self):
return self._task_id
@property
def attempt(self):
return self._attempt
@property
def ds_metadata(self):
return {"objects": self._objects.copy(), "info": self._info.copy()}
@property
def pathspec_index(self):
idxstr = ",".join(map(str, (f.index for f in self["_foreach_stack"])))
if "_iteration_stack" in self:
itrstr = ",".join(map(str, (f for f in self["_iteration_stack"])))
return "%s/%s[%s][%s]" % (self._run_id, self._step_name, idxstr, itrstr)
return "%s/%s[%s]" % (self._run_id, self._step_name, idxstr)
@property
def parent_datastore(self):
return self._parent
@require_mode(None)
def get_log_location(self, logprefix, stream):
log_name = self._get_log_location(logprefix, stream)
path = self._storage_impl.path_join(
self._path, self._metadata_name_for_attempt(log_name)
)
return self._storage_impl.full_uri(path)
@require_mode("r")
def keys_for_artifacts(self, names):
return [self._objects.get(name) for name in names]
@only_if_not_done
@require_mode("w")
def init_task(self):
"""
Call this to initialize the datastore with a new attempt.
This method requires mode 'w'.
"""
self.save_metadata({self.METADATA_ATTEMPT_SUFFIX: {"time": time.time()}})
@only_if_not_done
@require_mode("w")
def transfer_artifacts(
self, other_datastore: "TaskDataStore", names: Optional[List[str]] = None
):
"""
Copies the blobs from other_datastore to this datastore if the datastore roots
are different.
This is used specifically for spin so we can bring in artifacts from the original
datastore.
Parameters
----------
other_datastore : TaskDataStore
Other datastore from which to copy artifacts from
names : List[str], optional, default None
If provided, only transfer the artifacts with these names. If None,
transfer all artifacts from the other datastore.
"""
if (
other_datastore.TYPE == self.TYPE
and other_datastore._storage_impl.datastore_root
== self._storage_impl.datastore_root
):
# Nothing to transfer -- artifacts are already saved properly
return
# Determine which artifacts need to be transferred
if names is None:
# Transfer all artifacts from other datastore
artifacts_to_transfer = list(other_datastore._objects.keys())
else:
# Transfer only specified artifacts
artifacts_to_transfer = [
name for name in names if name in other_datastore._objects
]
if not artifacts_to_transfer:
return
# Get SHA keys for artifacts to transfer
shas_to_transfer = [
other_datastore._objects[name] for name in artifacts_to_transfer
]
# Check which blobs are missing locally
missing_shas = []
for sha in shas_to_transfer:
local_path = self._ca_store._storage_impl.path_join(
self._ca_store._prefix, sha[:2], sha
)
if not self._ca_store._storage_impl.is_file([local_path])[0]:
missing_shas.append(sha)
if not missing_shas:
return # All blobs already exist locally
# Load blobs from other datastore in transfer mode
transfer_blobs = other_datastore._ca_store.load_blobs(
missing_shas, is_transfer=True
)
# Save blobs to local datastore in transfer mode
self._ca_store.save_blobs(transfer_blobs, is_transfer=True)
@only_if_not_done
@require_mode("w")
def save_artifacts(self, artifacts_iter, len_hint=0):
"""
Saves Metaflow Artifacts (Python objects) to the datastore and stores
any relevant metadata needed to retrieve them.
Typically, objects are pickled but the datastore may perform any
operation that it deems necessary. You should only access artifacts
using load_artifacts
This method requires mode 'w'.
Parameters
----------
artifacts : Iterator[(string, object)]
Iterator over the human-readable name of the object to save
and the object itself
len_hint: integer
Estimated number of items in artifacts_iter
"""
artifact_names = []
def pickle_iter():
for name, obj in artifacts_iter:
encode_type = "gzip+pickle-v4"
if encode_type in self._encodings:
try:
blob = pickle.dumps(obj, protocol=4)
except TypeError as e:
raise UnpicklableArtifactException(name) from e
else:
try:
blob = pickle.dumps(obj, protocol=2)
encode_type = "gzip+pickle-v2"
except (SystemError, OverflowError) as e:
raise DataException(
"Artifact *%s* is very large (over 2GB). "
"You need to use Python 3.6 or newer if you want to "
"serialize large objects." % name
) from e
except TypeError as e:
raise UnpicklableArtifactException(name) from e
self._info[name] = {
"size": len(blob),
"type": str(type(obj)),
"encoding": encode_type,
}
artifact_names.append(name)
yield blob
# Use the content-addressed store to store all artifacts
save_result = self._ca_store.save_blobs(pickle_iter(), len_hint=len_hint)
for name, result in zip(artifact_names, save_result):
self._objects[name] = result.key
@require_mode(None)
def load_artifacts(self, names):
"""
Mirror function to save_artifacts
This function will retrieve the objects referenced by 'name'. Each
object will be fetched and returned if found. Note that this function
will return objects that may not be the same as the ones saved using
saved_objects (taking into account possible environment changes, for
example different conda environments) but it will return objects that
can be used as the objects passed in to save_objects.
This method can be used in both 'r' and 'w' mode. For the latter use
case, this can happen when `passdown_partial` is called and an artifact
passed down that way is then loaded.
Parameters
----------
names : List[string]
List of artifacts to retrieve
Returns
-------
Iterator[(string, object)] :
An iterator over objects retrieved.
"""
if not self._info:
raise DataException(
"Datastore for task '%s' does not have the required metadata to "
"load artifacts" % self._path
)
to_load = defaultdict(list)
for name in names:
info = self._info.get(name)
# We use gzip+pickle-v2 as this is the oldest/most compatible.
# This datastore will always include the proper encoding version so
# this is just to be able to read very old artifacts
if info:
encode_type = info.get("encoding", "gzip+pickle-v2")
else:
encode_type = "gzip+pickle-v2"
if encode_type not in self._encodings:
raise DataException(
"Python 3.6 or later is required to load artifact '%s'" % name
)
else:
to_load[self._objects[name]].append(name)
# At this point, we load what we don't have from the CAS
# We assume that if we have one "old" style artifact, all of them are
# like that which is an easy assumption to make since artifacts are all
# stored by the same implementation of the datastore for a given task.
for key, blob in self._ca_store.load_blobs(to_load.keys()):
names = to_load[key]
for name in names:
# We unpickle everytime to have fully distinct objects (the user
# would not expect two artifacts with different names to actually
# be aliases of one another)
yield name, pickle.loads(blob)
@require_mode("r")
def get_artifact_sizes(self, names):
"""
Retrieves file sizes of artifacts defined in 'names' from their respective
stored file metadata.
Usage restricted to only 'r' mode due to depending on the metadata being written
Parameters
----------
names : List[string]
List of artifacts to retrieve
Returns
-------
Iterator[(string, int)] :
An iterator over sizes retrieved.
"""
for name in names:
info = self._info.get(name)
if info["type"] == _included_file_type:
sz = self[name].size
else:
sz = info.get("size", 0)
yield name, sz
@require_mode("r")
def get_legacy_log_size(self, stream):
name = self._metadata_name_for_attempt("%s.log" % stream)
path = self._storage_impl.path_join(self._path, name)
return self._storage_impl.size_file(path)
@require_mode("r")
def get_log_size(self, logsources, stream):
def _path(s):
# construct path for fetching of a single log source
_p = self._metadata_name_for_attempt(self._get_log_location(s, stream))
return self._storage_impl.path_join(self._path, _p)
paths = list(map(_path, logsources))
sizes = [self._storage_impl.size_file(p) for p in paths]
return sum(size for size in sizes if size is not None)
@only_if_not_done
@require_mode("w")
def save_metadata(self, contents, allow_overwrite=True, add_attempt=True):
"""
Save task metadata. This is very similar to save_artifacts; this
function takes a dictionary with the key being the name of the metadata
to save and the value being the metadata.
The metadata, however, will not be stored in the CAS but rather directly
in the TaskDataStore.
This method requires mode 'w'
Parameters
----------
contents : Dict[string -> JSON-ifiable objects]
Dictionary of metadata to store
allow_overwrite : boolean, optional
If True, allows the overwriting of the metadata, defaults to True
add_attempt : boolean, optional
If True, adds the attempt identifier to the metadata. defaults to
True
"""
return self._save_file(
{k: json.dumps(v).encode("utf-8") for k, v in contents.items()},
allow_overwrite,
add_attempt,
)
@require_mode("w")
def _dangerous_save_metadata_post_done(
self, contents, allow_overwrite=True, add_attempt=True
):
"""
Method identical to save_metadata BUT BYPASSES THE CHECK ON DONE
@warning This method should not be used unless you know what you are doing. This
will write metadata to a datastore that has been marked as done which is an
assumption that other parts of metaflow rely on (ie: when a datastore is marked
as done, it is considered to be read-only).
Currently only used in the case when the task is executed remotely but there is
no (remote) metadata service configured. We therefore use the datastore to share
metadata between the task and the Metaflow local scheduler. Due to some other
constraints and the current plugin API, we could not use the regular method
to save metadata.
This method requires mode 'w'
Parameters
----------
contents : Dict[string -> JSON-ifiable objects]
Dictionary of metadata to store
allow_overwrite : boolean, optional
If True, allows the overwriting of the metadata, defaults to True
add_attempt : boolean, optional
If True, adds the attempt identifier to the metadata. defaults to
True
"""
return self._save_file(
{k: json.dumps(v).encode("utf-8") for k, v in contents.items()},
allow_overwrite,
add_attempt,
)
@require_mode("r")
def load_metadata(self, names, add_attempt=True):
"""
Loads metadata saved with `save_metadata`
Parameters
----------
names : List[string]
The name of the metadata elements to load
add_attempt : bool, optional
Adds the attempt identifier to the metadata name if True,
by default True
Returns
-------
Dict: string -> JSON decoded object
Results indexed by the name of the metadata loaded
"""
transformer = lambda x: x
if sys.version_info < (3, 6):
transformer = lambda x: x.decode("utf-8")
return {
k: json.loads(transformer(v)) if v is not None else None
for k, v in self._load_file(names, add_attempt).items()
}
@require_mode(None)
def has_metadata(self, name, add_attempt=True):
"""
Checks if this TaskDataStore has the metadata requested
TODO: Should we make this take multiple names like the other calls?
This method operates like load_metadata in both 'w' and 'r' modes.
Parameters
----------
names : string
Metadata name to fetch
add_attempt : bool, optional
Adds the attempt identifier to the metadata name if True,
by default True
Returns
-------
boolean
True if the metadata exists or False otherwise
"""
if add_attempt:
path = self._storage_impl.path_join(
self._path, self._metadata_name_for_attempt(name)
)
else:
path = self._storage_impl.path_join(self._path, name)
return self._storage_impl.is_file([path])[0]
@require_mode(None)
def get(self, name, default=None):
"""
Convenience method around load_artifacts for a given name and with a
provided default.
This method requires mode 'r'.
Parameters
----------
name : str
Name of the object to get
default : object, optional
Returns this value if object not found, by default None
"""
if self._objects:
try:
return self[name] if name in self._objects else default
except DataException:
return default
return default
@require_mode("r")
def is_none(self, name):
"""
Convenience method to test if an artifact is None
This method requires mode 'r'.
Parameters
----------
name : string
Name of the artifact
"""
if not self._info:
return True
info = self._info.get(name)
if info:
obj_type = info.get("type")
# Conservatively check if the actual object is None,
# in case the artifact is stored using a different python version.
# Note that if an object is None and stored in Py2 and accessed in
# Py3, this test will fail and we will fall back to the slow path. This
# is intended (being conservative)
if obj_type == str(type(None)):
return True
# Slow path since this has to get the object from the datastore
return self.get(name) is None
@only_if_not_done
@require_mode("w")
def done(self):
"""
Mark this task-datastore as 'done' for the current attempt
Will throw an exception if mode != 'w'
"""
self.save_metadata(
{
self.METADATA_DATA_SUFFIX: {
"datastore": self.TYPE,
"version": "1.0",
"attempt": self._attempt,
"python_version": sys.version,
"objects": self._objects,
"info": self._info,
},
self.METADATA_DONE_SUFFIX: "",
}
)
if self._metadata:
self._metadata.register_metadata(
self._run_id,
self._step_name,
self._task_id,
[
MetaDatum(
field="attempt-done",
value=str(self._attempt),
type="attempt-done",
tags=["attempt_id:{0}".format(self._attempt)],
)
],
)
artifacts = [
DataArtifact(
name=var,
ds_type=self.TYPE,
ds_root=self._storage_impl.datastore_root,
url=None,
sha=sha,
type=self._info[var]["encoding"],
)
for var, sha in self._objects.items()
]
self._metadata.register_data_artifacts(
self.run_id, self.step_name, self.task_id, self._attempt, artifacts
)
self._is_done_set = True
@only_if_not_done
@require_mode("w")
def clone(self, origin):
"""
Clone the information located in the TaskDataStore origin into this
datastore
Parameters
----------
origin : TaskDataStore
TaskDataStore to clone
"""
self._objects = origin._objects
self._info = origin._info
@only_if_not_done
@require_mode("w")
def passdown_partial(self, origin, variables):
# Pass-down from datastore origin all information related to vars to
# this datastore. In other words, this adds to the current datastore all
# the variables in vars (obviously, it does not download them or
# anything but records information about them). This is used to
# propagate parameters between datastores without actually loading the
# parameters as well as for merge_artifacts
for var in variables:
sha = origin._objects.get(var)
if sha:
self._objects[var] = sha
self._info[var] = origin._info[var]
@only_if_not_done
@require_mode("w")
def persist(self, flow):
"""
Persist any new artifacts that were produced when running flow
NOTE: This is a DESTRUCTIVE operation that deletes artifacts from
the given flow to conserve memory. Don't rely on artifact attributes
of the flow object after calling this function.
Parameters
----------
flow : FlowSpec
Flow to persist
"""
if not self._persist:
return
if flow._datastore:
self._objects.update(flow._datastore._objects)
self._info.update(flow._datastore._info)
# Scan flow object FIRST
valid_artifacts = []
current_artifact_names = set()
for var in dir(flow):
if var.startswith("__") or var in flow._EPHEMERAL:
continue
# Skip over properties of the class (Parameters or class variables)
if hasattr(flow.__class__, var) and isinstance(
getattr(flow.__class__, var), property
):
continue
val = getattr(flow, var)
if not (
isinstance(val, MethodType)
or isinstance(val, FunctionType)
or isinstance(val, Parameter)
):
valid_artifacts.append((var, val))
current_artifact_names.add(var)
# Transfer ONLY artifacts that aren't being overridden
if hasattr(flow._datastore, "orig_datastore"):
parent_artifacts = set(flow._datastore._objects.keys())
unchanged_artifacts = parent_artifacts - current_artifact_names
if unchanged_artifacts:
self.transfer_artifacts(
flow._datastore.orig_datastore, names=list(unchanged_artifacts)
)
def artifacts_iter():
# we consume the valid_artifacts list destructively to
# make sure we don't keep references to artifacts. We
# want to avoid keeping original artifacts and encoded
# artifacts in memory simultaneously
while valid_artifacts:
var, val = valid_artifacts.pop()
if not var.startswith("_") and var != "name":
# NOTE: Destructive mutation of the flow object. We keep
# around artifacts called 'name' and anything starting with
# '_' as they are used by the Metaflow runtime.
delattr(flow, var)
yield var, val
# Save current artifacts
self.save_artifacts(artifacts_iter(), len_hint=len(valid_artifacts))
@only_if_not_done
@require_mode("w")
def save_logs(self, logsource, stream_data):
"""
Save log files for multiple streams, represented as
a dictionary of streams. Each stream is identified by a type (a string)
and is either a stringish or a BytesIO object or a Path object.
Parameters
----------
logsource : string
Identifies the source of the stream (runtime, task, etc)
stream_data : Dict[string -> bytes or Path]
Each entry should have a string as the key indicating the type
of the stream ('stderr', 'stdout') and as value should be bytes or
a Path from which to stream the log.
"""
to_store_dict = {}
for stream, data in stream_data.items():
n = self._get_log_location(logsource, stream)
if isinstance(data, Path):
to_store_dict[n] = FileIO(str(data), mode="r")
else:
to_store_dict[n] = data
self._save_file(to_store_dict)
@require_mode("d")
def scrub_logs(self, logsources, stream, attempt_override=None):
path_logsources = {
self._metadata_name_for_attempt(
self._get_log_location(s, stream),
attempt_override=attempt_override,
): s
for s in logsources
}
# Legacy log paths
legacy_log = self._metadata_name_for_attempt(
"%s.log" % stream, attempt_override
)
path_logsources[legacy_log] = stream
existing_paths = [
path
for path in path_logsources.keys()
if self.has_metadata(path, add_attempt=False)
]
# Replace log contents with [REDACTED source stream]
to_store_dict = {
path: bytes("[REDACTED %s %s]" % (path_logsources[path], stream), "utf-8")
for path in existing_paths
}
self._save_file(to_store_dict, add_attempt=False, allow_overwrite=True)
@require_mode("r")
def load_log_legacy(self, stream, attempt_override=None):
"""
Load old-style, pre-mflog, log file represented as a bytes object.
"""
name = self._metadata_name_for_attempt("%s.log" % stream, attempt_override)
r = self._load_file([name], add_attempt=False)[name]
return r if r is not None else b""
@require_mode("r")
def load_logs(self, logsources, stream, attempt_override=None):
paths = dict(
map(
lambda s: (
self._metadata_name_for_attempt(
self._get_log_location(s, stream),
attempt_override=attempt_override,
),
s,
),
logsources,
)
)
r = self._load_file(paths.keys(), add_attempt=False)
return [(paths[k], v if v is not None else b"") for k, v in r.items()]
@require_mode(None)
def items(self):
if self._objects:
return self._objects.items()
return {}
@require_mode(None)
def to_dict(self, show_private=False, max_value_size=None, include=None):
d = {}
for k, _ in self.items():
if include and k not in include:
continue
if k[0] == "_" and not show_private:
continue
info = self._info[k]
if max_value_size is not None:
if info["type"] == _included_file_type:
sz = self[k].size
else:
sz = info.get("size", 0)
if sz == 0 or sz > max_value_size:
d[k] = ArtifactTooLarge()
else:
d[k] = self[k]
if info["type"] == _included_file_type:
d[k] = d[k].decode(k)
else:
d[k] = self[k]
if info["type"] == _included_file_type:
d[k] = d[k].decode(k)
return d
@require_mode("r")
def format(self, **kwargs):
def lines():
for k, v in self.to_dict(**kwargs).items():
if self._info[k]["type"] == _included_file_type:
sz = self[k].size
else:
sz = self._info[k]["size"]
yield k, "*{key}* [size: {size} type: {type}] = {value}".format(
key=k, value=v, size=sz, type=self._info[k]["type"]
)
return "\n".join(line for k, line in sorted(lines()))
@require_mode(None)
def __contains__(self, name):
if self._objects:
return name in self._objects
return False
@require_mode(None)
def __getitem__(self, name):
_, obj = next(self.load_artifacts([name]))
return obj
@require_mode("r")
def __iter__(self):
if self._objects:
return iter(self._objects)
return iter([])
@require_mode("r")
def __str__(self):
return self.format(show_private=True, max_value_size=1000)
def _metadata_name_for_attempt(self, name, attempt_override=None):
return self.metadata_name_for_attempt(
name, self._attempt if attempt_override is None else attempt_override
)
@staticmethod
def _get_log_location(logprefix, stream):
return "%s_%s.log" % (logprefix, stream)
def _save_file(self, contents, allow_overwrite=True, add_attempt=True):
"""
Saves files in the directory for this TaskDataStore. This can be
metadata, a log file or any other data that doesn't need to (or
shouldn't) be stored in the Content Addressed Store.
Parameters
----------
contents : Dict[string -> stringish or RawIOBase or BufferedIOBase]
Dictionary of file to store
allow_overwrite : boolean, optional
If True, allows the overwriting of the metadata, defaults to True
add_attempt : boolean, optional
If True, adds the attempt identifier to the metadata,
defaults to True
"""
def blob_iter():
for name, value in contents.items():
if add_attempt:
path = self._storage_impl.path_join(
self._path, self._metadata_name_for_attempt(name)
)
else:
path = self._storage_impl.path_join(self._path, name)
if isinstance(value, (RawIOBase, BufferedIOBase)) and value.readable():
yield path, value
elif is_stringish(value):
yield path, to_fileobj(value)
else:
raise DataException(
"Metadata '%s' for task '%s' has an invalid type: %s"
% (name, self._path, type(value))
)
self._storage_impl.save_bytes(blob_iter(), overwrite=allow_overwrite)
def _load_file(self, names, add_attempt=True):
"""
Loads files from the TaskDataStore directory. These can be metadata,
logs or any other files
Parameters
----------
names : List[string]
The names of the files to load
add_attempt : bool, optional
Adds the attempt identifier to the metadata name if True,
by default True
Returns
-------
Dict: string -> bytes
Results indexed by the name of the metadata loaded
"""
to_load = []
for name in names:
if add_attempt:
path = self._storage_impl.path_join(
self._path, self._metadata_name_for_attempt(name)
)
else:
path = self._storage_impl.path_join(self._path, name)
to_load.append(path)
results = {}
with self._storage_impl.load_bytes(to_load) as load_results:
for key, path, meta in load_results:
if add_attempt:
_, name = self.parse_attempt_metadata(
self._storage_impl.basename(key)
)
else:
name = self._storage_impl.basename(key)
if path is None:
results[name] = None
else:
with open(path, "rb") as f:
results[name] = f.read()
return results
================================================
FILE: metaflow/debug.py
================================================
from __future__ import print_function
import inspect
import sys
from functools import partial
from .util import is_stringish
# Set
#
# - METAFLOW_DEBUG_SUBCOMMAND=1
# to see command lines used to launch subcommands (especially 'step')
# - METAFLOW_DEBUG_SIDECAR=1
# to see command lines used to launch sidecars
# - METAFLOW_DEBUG_S3CLIENT=1
# to see command lines used by the S3 client. Note that this environment
# variable also disables automatic cleaning of subdirectories, which can
# fill up disk space quickly
class Debug(object):
def __init__(self):
import metaflow.metaflow_config as config
for typ in config.DEBUG_OPTIONS:
if getattr(config, "DEBUG_%s" % typ.upper()):
op = partial(self.log, typ)
else:
op = self.noop
# use debug.$type_exec(args) to log command line for subprocesses
# of type $type
setattr(self, "%s_exec" % typ, op)
# use the debug.$type flag to check if logging is enabled for $type
setattr(self, typ, op != self.noop)
def log(self, typ, args):
if is_stringish(args):
s = args
else:
s = " ".join(args)
lineno = inspect.currentframe().f_back.f_lineno
filename = inspect.stack()[1][1]
print("debug[%s %s:%s]: %s" % (typ, filename, lineno, s), file=sys.stderr)
def __getattr__(self, name):
# Small piece of code to get pyright and other linters to recognize that there
# are dynamic attributes.
return getattr(self, name)
def noop(self, args):
pass
debug = Debug()
================================================
FILE: metaflow/decorators.py
================================================
import importlib
import json
import re
from functools import partial
from typing import Any, Callable, Dict, List, NewType, Tuple, TypeVar, Union, overload
from .flowspec import FlowSpec, FlowStateItems
from .exception import (
MetaflowInternalError,
MetaflowException,
InvalidDecoratorAttribute,
)
from .debug import debug
from .parameters import current_flow
from .user_configs.config_parameters import (
UNPACK_KEY,
resolve_delayed_evaluator,
unpack_delayed_evaluator,
)
from .user_decorators.mutable_flow import MutableFlow
from .user_decorators.mutable_step import MutableStep
from .user_decorators.user_flow_decorator import FlowMutator, FlowMutatorMeta
from .user_decorators.user_step_decorator import (
StepMutator,
UserStepDecoratorBase,
UserStepDecoratorMeta,
)
from .metaflow_config import SPIN_ALLOWED_DECORATORS
from metaflow._vendor import click
class BadStepDecoratorException(MetaflowException):
headline = "Syntax error"
def __init__(self, deco, func):
msg = (
"You tried to apply decorator '{deco}' on '{func}' which is "
"not declared as a @step. Make sure you apply this decorator "
"on a function which has @step on the line just before the "
"function name and @{deco} is above @step.".format(
deco=deco, func=getattr(func, "__name__", str(func))
)
)
super(BadStepDecoratorException, self).__init__(msg)
class BadFlowDecoratorException(MetaflowException):
headline = "Syntax error"
def __init__(self, deconame):
msg = (
"Decorator '%s' can be applied only to FlowSpecs. Make sure "
"the decorator is above a class definition." % deconame
)
super(BadFlowDecoratorException, self).__init__(msg)
class UnknownStepDecoratorException(MetaflowException):
headline = "Unknown step decorator"
def __init__(self, deconame):
decos = ", ".join(
[
x
for x in UserStepDecoratorMeta.all_decorators().keys()
if not x.endswith("_internal")
]
)
msg = (
"Unknown step decorator *{deconame}*. The following decorators are "
"supported: *{decos}*".format(deconame=deconame, decos=decos)
)
super(UnknownStepDecoratorException, self).__init__(msg)
class DuplicateStepDecoratorException(MetaflowException):
headline = "Duplicate decorators"
def __init__(self, deco, func):
msg = (
"Step '{step}' already has a decorator '{deco}'. "
"You can specify this decorator only once.".format(
step=func.__name__, deco=deco
)
)
super(DuplicateStepDecoratorException, self).__init__(msg)
class UnknownFlowDecoratorException(MetaflowException):
headline = "Unknown flow decorator"
def __init__(self, deconame):
decos = ", ".join(FlowMutatorMeta.all_decorators().keys())
msg = (
"Unknown flow decorator *{deconame}*. The following decorators are "
"supported: *{decos}*".format(deconame=deconame, decos=decos)
)
super(UnknownFlowDecoratorException, self).__init__(msg)
class DuplicateFlowDecoratorException(MetaflowException):
headline = "Duplicate decorators"
def __init__(self, deco):
msg = (
"Flow already has a decorator '{deco}'. "
"You can specify each decorator only once.".format(deco=deco)
)
super(DuplicateFlowDecoratorException, self).__init__(msg)
class Decorator(object):
"""
Base class for all decorators.
"""
name = "NONAME"
defaults = {}
# `allow_multiple` allows setting many decorators of the same type to a step/flow.
allow_multiple = False
def __init__(self, attributes=None, statically_defined=False, inserted_by=None):
self.attributes = self.defaults.copy()
self.statically_defined = statically_defined
self.inserted_by = inserted_by
self._user_defined_attributes = set()
self._ran_init = False
if attributes:
for k, v in attributes.items():
if k in self.defaults or k.startswith(UNPACK_KEY):
self.attributes[k] = v
if not k.startswith(UNPACK_KEY):
self._user_defined_attributes.add(k)
else:
raise InvalidDecoratorAttribute(self.name, k, self.defaults)
def init(self):
"""
Initializes the decorator. In general, any operation you would do in __init__
should be done here.
"""
pass
def external_init(self):
# In some cases (specifically when using remove_decorator), we may need to call
# init multiple times. Short-circuit re-evaluating.
if self._ran_init:
return
# Note that by design, later values override previous ones.
self.attributes, new_user_attributes = unpack_delayed_evaluator(self.attributes)
self._user_defined_attributes.update(new_user_attributes)
self.attributes = resolve_delayed_evaluator(self.attributes, to_dict=True)
if "init" in self.__class__.__dict__:
self.init()
self._ran_init = True
@classmethod
def extract_args_kwargs_from_decorator_spec(cls, deco_spec):
if len(deco_spec) == 0:
return [], {}
attrs = {}
# TODO: Do we really want to allow spaces in the names of attributes?!?
for a in re.split(r""",(?=[\s\w]+=)""", deco_spec):
name, val = a.split("=", 1)
try:
val_parsed = json.loads(val.strip().replace('\\"', '"'))
except json.JSONDecodeError:
# In this case, we try to convert to either an int or a float or
# leave as is. Prefer ints if possible.
try:
val_parsed = int(val.strip())
except ValueError:
try:
val_parsed = float(val.strip())
except ValueError:
val_parsed = val.strip()
attrs[name.strip()] = val_parsed
return [], attrs
@classmethod
def parse_decorator_spec(cls, deco_spec):
if len(deco_spec) == 0:
return cls()
_, kwargs = cls.extract_args_kwargs_from_decorator_spec(deco_spec)
return cls(attributes=kwargs)
def make_decorator_spec(self):
# Make sure all attributes are evaluated
self.external_init()
attrs = {k: v for k, v in self.attributes.items() if v is not None}
if attrs:
attr_list = []
# We dump simple types directly as string to get around the nightmare quote
# escaping but for more complex types (typically dictionaries or lists),
# we dump using JSON.
for k, v in attrs.items():
if isinstance(v, (int, float, str)):
attr_list.append("%s=%s" % (k, str(v)))
else:
attr_list.append("%s=%s" % (k, json.dumps(v).replace('"', '\\"')))
attrstr = ",".join(attr_list)
return "%s:%s" % (self.name, attrstr)
else:
return self.name
def get_args_kwargs(self) -> Tuple[List[Any], Dict[str, Any]]:
"""
Get the arguments and keyword arguments of the decorator.
Returns
-------
Tuple[List[Any], Dict[str, Any]]
A tuple containing a list of arguments and a dictionary of keyword arguments.
"""
return [], dict(self.attributes)
def __str__(self):
mode = "static" if self.statically_defined else "dynamic"
if self.inserted_by:
mode += " (inserted by %s)" % " from ".join(self.inserted_by)
attrs = " ".join("%s=%s" % x for x in self.attributes.items())
if attrs:
attrs = " " + attrs
fmt = "%s<%s%s>" % (self.name, mode, attrs)
return fmt
class FlowDecorator(Decorator):
options = {}
def __init__(self, *args, **kwargs):
super(FlowDecorator, self).__init__(*args, **kwargs)
def flow_init(
self, flow, graph, environment, flow_datastore, metadata, logger, echo, options
):
"""
Called when all decorators have been created for this flow.
"""
pass
def get_top_level_options(self):
"""
Return a list of option-value pairs that correspond to top-level
options that should be passed to subprocesses (tasks). The option
names should be a subset of the keys in self.options.
If the decorator has a non-empty set of options in `self.options`, you
probably want to return the assigned values in this method.
"""
return []
# compare this to parameters.add_custom_parameters
def add_decorator_options(cmd):
flow_cls = getattr(current_flow, "flow_cls", None)
if flow_cls is None:
return cmd
seen = {}
existing_params = set(p.name.lower() for p in cmd.params)
# Add decorator options
for deco in flow_decorators(flow_cls):
for option, kwargs in deco.options.items():
if option in seen:
msg = (
"Flow decorator '%s' uses an option '%s' which is also "
"used by the decorator '%s'. This is a bug in Metaflow. "
"Please file a ticket on GitHub."
% (deco.name, option, seen[option])
)
raise MetaflowInternalError(msg)
elif deco.name.lower() in existing_params:
raise MetaflowInternalError(
"Flow decorator '%s' uses an option '%s' which is a reserved "
"keyword. Please use a different option name." % (deco.name, option)
)
else:
kwargs["envvar"] = "METAFLOW_FLOW_%s" % option.upper()
seen[option] = deco.name
cmd.params.insert(0, click.Option(("--" + option,), **kwargs))
return cmd
def flow_decorators(flow_cls):
return [
d
for deco_list in flow_cls._flow_state[FlowStateItems.FLOW_DECORATORS].values()
for d in deco_list
]
class StepDecorator(Decorator):
"""
Base class for all step decorators.
Example:
@my_decorator
@step
def a(self):
pass
@my_decorator
@step
def b(self):
pass
To make the above work, define a subclass
class MyDecorator(StepDecorator):
name = "my_decorator"
and include it in plugins.STEP_DECORATORS. Now both a() and b()
get an instance of MyDecorator, so you can keep step-specific
state easily.
TODO (savin): Initialize the decorators with flow, graph,
step.__name__ etc., so that we don't have to
pass them around with every lifecycle call.
"""
def step_init(
self, flow, graph, step_name, decorators, environment, flow_datastore, logger
):
"""
Called when all decorators have been created for this step
"""
pass
def package_init(self, flow, step_name, environment):
"""
Called to determine package components
"""
pass
def add_to_package(self):
"""
Called to add custom files needed for this environment. This hook will be
called in the `MetaflowPackage` class where metaflow compiles the code package
tarball. This hook can return one of two things (the first is for backwards
compatibility -- move to the second):
- a generator yielding a tuple of `(file_path, arcname)` to add files to
the code package. `file_path` is the path to the file on the local filesystem
and `arcname` is the path relative to the packaged code.
- a generator yielding a tuple of `(content, arcname, type)` where:
- type is one of
ContentType.{USER_CONTENT, CODE_CONTENT, MODULE_CONTENT, OTHER_CONTENT}
- for USER_CONTENT:
- the file will be included relative to the directory containing the
user's flow file.
- content: path to the file to include
- arcname: path relative to the directory containing the user's flow file
- for CODE_CONTENT:
- the file will be included relative to the code directory in the package.
This will be the directory containing `metaflow`.
- content: path to the file to include
- arcname: path relative to the code directory in the package
- for MODULE_CONTENT:
- the module will be added to the code package as a python module. It will
be accessible as usual (import )
- content: name of the module
- arcname: None (ignored)
- for OTHER_CONTENT:
- the file will be included relative to any other configuration/metadata
files for the flow
- content: path to the file to include
- arcname: path relative to the config directory in the package
"""
return []
def step_task_retry_count(self):
"""
Called to determine the number of times this task should be retried.
Returns a tuple of (user_code_retries, error_retries). Error retries
are attempts to run the process after the user code has failed all
its retries.
Typically, the runtime takes the maximum of retry counts across
decorators and user specification to determine the task retry count.
If you want to force no retries, return the special values (None, None).
"""
return 0, 0
def runtime_init(self, flow, graph, package, run_id):
"""
Top-level initialization before anything gets run in the runtime
context.
"""
pass
def runtime_task_created(
self, task_datastore, task_id, split_index, input_paths, is_cloned, ubf_context
):
"""
Called when the runtime has created a task related to this step.
"""
pass
def runtime_finished(self, exception):
"""
Called when the runtime created task finishes or encounters an interrupt/exception.
"""
pass
def runtime_step_cli(
self, cli_args, retry_count, max_user_code_retries, ubf_context
):
"""
Access the command line for a step execution in the runtime context.
"""
pass
def task_pre_step(
self,
step_name,
task_datastore,
metadata,
run_id,
task_id,
flow,
graph,
retry_count,
max_user_code_retries,
ubf_context,
inputs,
):
"""
Run before the step function in the task context.
"""
pass
def task_decorate(
self, step_func, flow, graph, retry_count, max_user_code_retries, ubf_context
):
return step_func
def task_post_step(
self, step_name, flow, graph, retry_count, max_user_code_retries
):
"""
Run after the step function has finished successfully in the task
context.
"""
pass
def task_exception(
self, exception, step_name, flow, graph, retry_count, max_user_code_retries
):
"""
Run if the step function raised an exception in the task context.
If this method returns True, it is assumed that the exception has
been taken care of and the flow may continue.
"""
pass
def task_finished(
self, step_name, flow, graph, is_task_ok, retry_count, max_user_code_retries
):
"""
Run after the task context has been finalized.
is_task_ok is set to False if the user code raised an exception that
was not handled by any decorator.
Note that you can't create or modify data artifacts in this method
since the task has been finalized by the time this method
is called. Also note that the task may fail after this method has been
called, so this method may get called multiple times for a task over
multiple attempts, similar to all task_ methods.
"""
pass
def _base_flow_decorator(decofunc, *args, **kwargs):
"""
Decorator prototype for all flow (class) decorators. This function gets
specialized and imported for all decorators types by
_import_plugin_decorators().
"""
if args:
# No keyword arguments specified for the decorator, e.g. @foobar.
# The first argument is the class to be decorated.
cls = args[0]
"""
When stacking decorators, cls may be another FlowMutator, for example
@flow_decorator
@flow_mutator
class MyFlow(FlowSpec):
...
"""
if isinstance(cls, (FlowMutator,)):
cls = cls._flow_cls
if isinstance(cls, type) and issubclass(cls, FlowSpec):
# flow decorators add attributes in the class dictionary,
# cls._flow_state[FlowStateItems.FLOW_DECORATORS]. This is of type `{key:[decos]}`
self_flow_decos = cls._flow_state.self_data[FlowStateItems.FLOW_DECORATORS]
inherited_flow_decos = cls._flow_state.inherited_data.get(
FlowStateItems.FLOW_DECORATORS, {}
)
if (
decofunc.name in self_flow_decos
or decofunc.name in inherited_flow_decos
) and not decofunc.allow_multiple:
raise DuplicateFlowDecoratorException(decofunc.name)
else:
deco_instance = decofunc(attributes=kwargs, statically_defined=True)
self_flow_decos.setdefault(decofunc.name, []).append(deco_instance)
else:
raise BadFlowDecoratorException(decofunc.name)
return cls
else:
# Keyword arguments specified, e.g. @foobar(a=1, b=2).
# Return a decorator function that will get the actual
# function to be decorated as the first argument.
def wrap(f):
return _base_flow_decorator(decofunc, f, **kwargs)
return wrap
def _base_step_decorator(decotype, *args, **kwargs):
"""
Decorator prototype for all step decorators. This function gets specialized
and imported for all decorators types by _import_plugin_decorators().
"""
if args:
# No keyword arguments specified for the decorator, e.g. @foobar.
# The first argument is the function to be decorated.
func = args[0]
if isinstance(func, (StepMutator, UserStepDecoratorBase)):
func = func._my_step
if not hasattr(func, "is_step"):
raise BadStepDecoratorException(decotype.name, func)
# if `allow_multiple` is not `True` then only one decorator type is allowed per step
if (
decotype.name in [deco.name for deco in func.decorators]
and not decotype.allow_multiple
):
raise DuplicateStepDecoratorException(decotype.name, func)
else:
func.decorators.append(decotype(attributes=kwargs, statically_defined=True))
return func
else:
# Keyword arguments specified, e.g. @foobar(a=1, b=2).
# Return a decorator function that will get the actual
# function to be decorated as the first argument.
def wrap(f):
return _base_step_decorator(decotype, f, **kwargs)
return wrap
_all_step_decos = None
_all_flow_decos = None
def get_all_step_decos():
global _all_step_decos
if _all_step_decos is None:
from .plugins import STEP_DECORATORS
_all_step_decos = {decotype.name: decotype for decotype in STEP_DECORATORS}
return _all_step_decos
def get_all_flow_decos():
global _all_flow_decos
if _all_flow_decos is None:
from .plugins import FLOW_DECORATORS
_all_flow_decos = {decotype.name: decotype for decotype in FLOW_DECORATORS}
return _all_flow_decos
def extract_step_decorator_from_decospec(decospec: str):
splits = decospec.split(":", 1)
deconame = splits[0]
# Check if it is a user-defined decorator or metaflow decorator
deco_cls = UserStepDecoratorMeta.get_decorator_by_name(deconame)
if deco_cls is not None:
return (
deco_cls.parse_decorator_spec(splits[1] if len(splits) > 1 else ""),
len(splits) > 1,
)
# Check if this is a decorator we can import
if "." in deconame:
# We consider this to be a import path to a user decorator so
# something like "my_package.my_decorator"
module_name, class_name = deconame.rsplit(".", 1)
try:
module = importlib.import_module(module_name)
except ImportError as e:
raise MetaflowException(
"Could not import user decorator %s" % deconame
) from e
deco_cls = getattr(module, class_name, None)
if (
deco_cls is None
or not isinstance(deco_cls, type)
or not issubclass(deco_cls, UserStepDecoratorBase)
):
raise UnknownStepDecoratorException(deconame)
return (
deco_cls.parse_decorator_spec(splits[1] if len(splits) > 1 else ""),
len(splits) > 1,
)
raise UnknownStepDecoratorException(deconame)
def extract_flow_decorator_from_decospec(decospec: str):
splits = decospec.split(":", 1)
deconame = splits[0]
# Check if it is a user-defined decorator or metaflow decorator
deco_cls = FlowMutatorMeta.get_decorator_by_name(deconame)
if deco_cls is not None:
return (
deco_cls.parse_decorator_spec(splits[1] if len(splits) > 1 else ""),
len(splits) > 1,
)
else:
raise UnknownFlowDecoratorException(deconame)
def _attach_decorators(flow, decospecs):
"""
Attach decorators to all steps during runtime. This has the same
effect as if you defined the decorators statically in the source for
every step. Used by --with command line parameter.
"""
# Attach the decorator to all steps that don't have this decorator
# already. This means that statically defined decorators are always
# preferred over runtime decorators.
#
# Note that each step gets its own instance of the decorator class,
# so decorator can maintain step-specific state.
for step in flow:
_attach_decorators_to_step(step, decospecs)
def _attach_decorators_to_step(step, decospecs):
"""
Attach decorators to a step during runtime. This has the same
effect as if you defined the decorators statically in the source for
the step.
"""
for decospec in decospecs:
step_deco, _ = extract_step_decorator_from_decospec(decospec)
if isinstance(step_deco, StepDecorator):
# Check multiple
if (
step_deco.name not in [deco.name for deco in step.decorators]
or step_deco.allow_multiple
):
step.decorators.append(step_deco)
# Else it is ignored -- this is a non-static decorator
else:
step_deco.add_or_raise(step, False, 1, None)
def _should_skip_decorator_for_spin(
deco, is_spin, skip_decorators, logger, decorator_type="decorator"
):
"""
Determine if a decorator should be skipped for spin steps.
Parameters:
-----------
deco : Decorator
The decorator instance to check
is_spin : bool
Whether this is a spin step
skip_decorators : bool
Whether to skip all decorators
logger : callable
Logger function for warnings
decorator_type : str
Type of decorator ("Flow decorator" or "Step decorator") for logging
Returns:
--------
bool
True if the decorator should be skipped, False otherwise
"""
if not is_spin:
return False
# Skip all decorator hooks if skip_decorators is True
if skip_decorators:
return True
# Run decorator hooks for spin steps only if they are in the whitelist
if deco.name not in SPIN_ALLOWED_DECORATORS:
logger(
f"[Warning] Ignoring {decorator_type} '{deco.name}' as it is not supported in spin steps.",
system_msg=True,
timestamp=False,
bad=True,
)
return True
return False
def _init(flow, only_non_static=False):
flow_decos = flow._flow_state[FlowStateItems.FLOW_DECORATORS]
for decorators in flow_decos.values():
for deco in decorators:
deco.external_init()
for flowstep in flow:
for deco in flowstep.decorators:
deco.external_init()
for deco in flowstep.config_decorators or []:
deco.external_init()
for deco in flowstep.wrappers or []:
deco.external_init()
def _init_flow_decorators(
flow,
graph,
environment,
flow_datastore,
metadata,
logger,
echo,
deco_options,
is_spin=False,
skip_decorators=False,
):
# Since all flow decorators are stored as `{key:[deco]}` we iterate through each of them.
flow_decos = flow._flow_state[FlowStateItems.FLOW_DECORATORS]
for decorators in flow_decos.values():
# First resolve the `options` for the flow decorator.
# Options are passed from cli.
# For example `@project` can take a `--name` / `--branch` from the cli as options.
deco_flow_init_options = {}
deco = decorators[0]
# If a flow decorator allow multiple of same type then we don't allow multiple options for it.
if deco.allow_multiple:
if len(deco.options) > 0:
raise MetaflowException(
"Flow decorator `@%s` has multiple options, which is not allowed. "
"Please ensure the FlowDecorator `%s` has no options since flow decorators with "
"`allow_mutiple=True` are not allowed to have options"
% (deco.name, deco.__class__.__name__)
)
else:
# Each "non-multiple" flow decorator is only allowed to have one set of options
# Note that there may be no deco_options if a MutableFlow config injected
# the decorator.
deco_flow_init_options = {
option: deco_options.get(
option.replace("-", "_"), option_info["default"]
)
for option, option_info in deco.options.items()
}
for deco in decorators:
if _should_skip_decorator_for_spin(
deco, is_spin, skip_decorators, logger, "Flow decorator"
):
continue
deco.flow_init(
flow,
graph,
environment,
flow_datastore,
metadata,
logger,
echo,
deco_flow_init_options,
)
def _init_step_decorators(
flow,
graph,
environment,
flow_datastore,
logger,
is_spin=False,
skip_decorators=False,
):
# NOTE: We don't need the graph but keeping it for backwards compatibility with
# extensions that use it directly. We will remove it at some point.
# We call the mutate method for both the flow and step mutators.
cls = flow.__class__
# Run all the decorators. We first run the flow-level decorators
# and then the step level ones to maintain a consistent order with how
# other decorators are run.
for deco in cls._flow_state[FlowStateItems.FLOW_MUTATORS]:
if isinstance(deco, FlowMutator):
inserted_by_value = [deco.decorator_name] + (deco.inserted_by or [])
mutable_flow = MutableFlow(
cls,
pre_mutate=False,
statically_defined=deco.statically_defined,
inserted_by=inserted_by_value,
)
# Sanity check to make sure we are applying the decorator to the right
# class
if not deco._flow_cls == cls and not issubclass(cls, deco._flow_cls):
raise MetaflowInternalError(
"FlowMutator registered on the wrong flow -- "
"expected %s but got %s" % (deco._flow_cls.__name__, cls.__name__)
)
debug.userconf_exec(
"Evaluating flow level decorator %s (mutate)" % deco.__class__.__name__
)
deco.mutate(mutable_flow)
# We reset cached_parameters on the very off chance that the user added
# more configurations based on the configuration
cls._flow_state[FlowStateItems.CACHED_PARAMETERS] = None
else:
raise MetaflowInternalError(
"A non FlowMutator found in flow custom decorators"
)
for step in cls._steps:
for deco in step.config_decorators:
inserted_by_value = [deco.decorator_name] + (deco.inserted_by or [])
if isinstance(deco, StepMutator):
debug.userconf_exec(
"Evaluating step level decorator %s for %s (mutate)"
% (deco.__class__.__name__, step.name)
)
deco.mutate(
MutableStep(
cls,
step,
pre_mutate=False,
statically_defined=deco.statically_defined,
inserted_by=inserted_by_value,
)
)
else:
raise MetaflowInternalError(
"A non StepMutator found in step custom decorators"
)
if step.config_decorators:
# We remove all mention of the custom step decorator
setattr(cls, step.name, step)
cls._init_graph()
graph = flow._graph
for step in flow:
for deco in step.decorators:
if _should_skip_decorator_for_spin(
deco, is_spin, skip_decorators, logger, "Step decorator"
):
continue
deco.step_init(
flow,
graph,
step.__name__,
step.decorators,
environment,
flow_datastore,
logger,
)
def _process_late_attached_decorator(
deco_names,
flow,
graph,
environment,
flow_datastore,
logger,
is_spin=False,
skip_decorators=False,
):
for s in flow:
for deco in s.decorators:
if deco.name in deco_names:
deco.external_init()
for s in flow:
for deco in s.decorators:
if deco.name in deco_names:
if _should_skip_decorator_for_spin(
deco, is_spin, skip_decorators, logger, "Step decorator"
):
continue
deco.step_init(
flow,
graph,
s.__name__,
s.decorators,
environment,
flow_datastore,
logger,
)
FlowSpecDerived = TypeVar("FlowSpecDerived", bound=FlowSpec)
# The StepFlag is a "fake" input item to be able to distinguish
# callables and those that have had a `@step` decorator on them. This enables us
# to check the ordering of decorators (ie: put @step first) with the type
# system. There should be a better way to do this with a more flexible type
# system but this is what works for now with the Python type system
StepFlag = NewType("StepFlag", bool)
@overload
def step(
f: Callable[[FlowSpecDerived], None],
) -> Callable[[FlowSpecDerived, StepFlag], None]: ...
@overload
def step(
f: Callable[[FlowSpecDerived, Any], None],
) -> Callable[[FlowSpecDerived, Any, StepFlag], None]: ...
def step(
f: Union[Callable[[FlowSpecDerived], None], Callable[[FlowSpecDerived, Any], None]],
):
"""
Marks a method in a FlowSpec as a Metaflow Step. Note that this
decorator needs to be placed as close to the method as possible (ie:
before other decorators).
In other words, this is valid:
```
@batch
@step
def foo(self):
pass
```
whereas this is not:
```
@step
@batch
def foo(self):
pass
```
Parameters
----------
f : Union[Callable[[FlowSpecDerived], None], Callable[[FlowSpecDerived, Any], None]]
Function to make into a Metaflow Step
Returns
-------
Union[Callable[[FlowSpecDerived, StepFlag], None], Callable[[FlowSpecDerived, Any, StepFlag], None]]
Function that is a Metaflow Step
"""
f.is_step = True
f.decorators = []
f.config_decorators = []
f.wrappers = []
f.name = f.__name__
return f
def _import_plugin_decorators(globals_dict):
"""
Auto-generate a decorator function for every decorator
defined in plugins.STEP_DECORATORS and plugins.FLOW_DECORATORS.
"""
from .plugins import STEP_DECORATORS, FLOW_DECORATORS
# Q: Why not use StepDecorators directly as decorators?
# A: Getting an object behave as a decorator that can work
# both with and without arguments is surprisingly hard.
# It is easier to make plain function decorators work in
# the dual mode - see _base_step_decorator above.
for decotype in STEP_DECORATORS:
globals_dict[decotype.name] = partial(_base_step_decorator, decotype)
# add flow-level decorators
for decotype in FLOW_DECORATORS:
globals_dict[decotype.name] = partial(_base_flow_decorator, decotype)
================================================
FILE: metaflow/event_logger.py
================================================
from metaflow.sidecar import Message, MessageTypes, Sidecar
class NullEventLogger(object):
TYPE = "nullSidecarLogger"
def __init__(self, *args, **kwargs):
# Currently passed flow and env in kwargs
self._sidecar = Sidecar(self.TYPE)
def start(self):
return self._sidecar.start()
def terminate(self):
return self._sidecar.terminate()
def send(self, msg):
# Arbitrary message sending. Useful if you want to override some different
# types of messages.
self._sidecar.send(msg)
def log(self, payload):
if self._sidecar.is_active:
msg = Message(MessageTypes.BEST_EFFORT, payload)
self._sidecar.send(msg)
@classmethod
def get_worker(cls):
return None
================================================
FILE: metaflow/events.py
================================================
from collections import OrderedDict, namedtuple
from datetime import datetime
from typing import List, Optional, TYPE_CHECKING, Union
if TYPE_CHECKING:
import metaflow
MetaflowEvent = namedtuple("MetaflowEvent", ["name", "id", "timestamp", "type"])
MetaflowEvent.__doc__ = """
Container of metadata that identifies the event that triggered
the `Run` under consideration.
Attributes
----------
name : str
name of the event.
id : str
unique identifier for the event.
timestamp : datetime
timestamp recording creation time for the event.
type : str
type for the event - one of `event` or `run`
"""
class Trigger(object):
"""
Defines a container of event triggers' metadata.
"""
def __init__(self, _meta=None):
if _meta is None:
_meta = []
_meta.sort(key=lambda x: x.get("timestamp") or float("-inf"), reverse=True)
self._runs = None
self._events = [
MetaflowEvent(
**{
**obj,
# Add timestamp as datetime. Guaranteed to exist for Metaflow
# events - best effort for everything else.
**(
{"timestamp": datetime.fromtimestamp(obj["timestamp"])}
if obj.get("timestamp")
and isinstance(obj.get("timestamp"), int)
else {}
),
}
)
for obj in _meta
]
@classmethod
def from_runs(cls, run_objs: List["metaflow.Run"]):
run_objs.sort(key=lambda x: x.finished_at, reverse=True)
trigger = Trigger(
[
{
"type": "run",
"timestamp": run_obj.finished_at,
"name": "metaflow.%s.%s" % (run_obj.parent.id, run_obj["end"].id),
"id": run_obj.end_task.pathspec,
}
for run_obj in run_objs
]
)
trigger._runs = run_objs
return trigger
@property
def event(self) -> Optional[MetaflowEvent]:
"""
The `MetaflowEvent` object corresponding to the triggering event.
If multiple events triggered the run, this property is the latest event.
Returns
-------
MetaflowEvent, optional
The latest event that triggered the run, if applicable.
"""
return next(iter(self._events), None)
@property
def events(self) -> Optional[List[MetaflowEvent]]:
"""
The list of `MetaflowEvent` objects correspondings to all the triggering events.
Returns
-------
List[MetaflowEvent], optional
List of all events that triggered the run
"""
return list(self._events) or None
@property
def run(self) -> Optional["metaflow.Run"]:
"""
The corresponding `Run` object if the triggering event is a Metaflow run.
In case multiple runs triggered the run, this property is the latest run.
Returns `None` if none of the triggering events are a `Run`.
Returns
-------
Run, optional
Latest Run that triggered this run, if applicable.
"""
if self._runs is None:
self.runs
return next(iter(self._runs), None)
@property
def runs(self) -> Optional[List["metaflow.Run"]]:
"""
The list of `Run` objects in the triggering events.
Returns `None` if none of the triggering events are `Run` objects.
Returns
-------
List[Run], optional
List of runs that triggered this run, if applicable.
"""
if self._runs is None:
# to avoid circular import
from metaflow import Run
self._runs = [
Run(
# object id is the task pathspec for events that map to run
obj.id[: obj.id.index("/", obj.id.index("/") + 1)],
_namespace_check=False,
)
for obj in self._events
if obj.type == "run"
]
return list(self._runs) or None
def __getitem__(self, key: str) -> Union["metaflow.Run", MetaflowEvent]:
"""
If triggering events are runs, `key` corresponds to the flow name of the triggering run.
Otherwise, `key` corresponds to the event name and a `MetaflowEvent` object is returned.
Returns
-------
Union[Run, MetaflowEvent]
`Run` object if triggered by a run. Otherwise returns a `MetaflowEvent`.
"""
if self.runs:
for run in self.runs:
if run.path_components[0] == key:
return run
elif self.events:
for event in self.events:
if event.name == key:
return event
raise KeyError(key)
def __iter__(self):
if self.events:
return iter(self.events)
return iter([])
def __contains__(self, ident: str) -> bool:
try:
return bool(self.__getitem__(ident))
except KeyError:
return False
================================================
FILE: metaflow/exception.py
================================================
import sys
import traceback
# worker processes that exit with this exit code are not retried
METAFLOW_EXIT_DISALLOW_RETRY = 202
# worker processes that exit with this code should be retried (if retry counts left)
METAFLOW_EXIT_ALLOW_RETRY = 203
class MetaflowExceptionWrapper(Exception):
def __init__(self, exc=None):
if exc is not None:
self.exception = str(exc)
self.type = "%s.%s" % (exc.__class__.__module__, exc.__class__.__name__)
if sys.exc_info()[0] is None:
self.stacktrace = None
else:
self.stacktrace = traceback.format_exc()
# Base Exception defines its own __reduce__ and __setstate__
# which don't work nicely with derived exceptions. We override
# the magic methods related to pickle to get desired behavior.
def __reduce__(self):
return MetaflowExceptionWrapper, (None,), self.__dict__
def __getstate__(self):
return self.__dict__
def __setstate__(self, state):
self.__dict__ = state
def __repr__(self):
return str(self)
def __str__(self):
if self.stacktrace:
return self.stacktrace
else:
return "[no stacktrace]\n%s: %s" % (self.type, self.exception)
class MetaflowException(Exception):
headline = "Flow failed"
def __init__(self, msg="", lineno=None, source_file=None):
self.message = msg
self.line_no = lineno
self.source_file = source_file
super(MetaflowException, self).__init__()
def __str__(self):
prefix = ""
if self.source_file:
prefix = "%s:" % self.source_file
if self.line_no:
prefix = "line %d:" % self.line_no
prefix = "%s: " % prefix if prefix else ""
return "%s%s" % (prefix, self.message)
class ParameterFieldFailed(MetaflowException):
headline = "Parameter field failed"
def __init__(self, name, field):
exc = traceback.format_exc()
msg = (
"When evaluating the field *%s* for the Parameter *%s*, "
"the following exception occurred:\n\n%s" % (field, name, exc)
)
super(ParameterFieldFailed, self).__init__(msg)
class ParameterFieldTypeMismatch(MetaflowException):
headline = "Parameter field with a mismatching type"
def __init__(self, msg):
super(ParameterFieldTypeMismatch, self).__init__(msg)
class ExternalCommandFailed(MetaflowException):
headline = "External command failed"
def __init__(self, msg):
super(ExternalCommandFailed, self).__init__(msg)
class MetaflowNotFound(MetaflowException):
headline = "Object not found"
class MetaflowNamespaceMismatch(MetaflowException):
headline = "Object not in the current namespace"
def __init__(self, namespace):
msg = "Object not in namespace '%s'" % namespace
super(MetaflowNamespaceMismatch, self).__init__(msg)
class MetaflowInvalidPathspec(MetaflowException):
headline = "Invalid pathspec"
def __init__(self, msg):
super(MetaflowInvalidPathspec, self).__init__(msg)
class MetaflowInternalError(MetaflowException):
headline = "Internal error"
class MetaflowTaggingError(MetaflowException):
headline = "Tagging error"
class MetaflowUnknownUser(MetaflowException):
headline = "Unknown user"
def __init__(self):
msg = (
"Metaflow could not determine your user name based on "
"environment variables ($USERNAME etc.)"
)
super(MetaflowUnknownUser, self).__init__(msg)
class InvalidDecoratorAttribute(MetaflowException):
headline = "Unknown decorator attribute"
def __init__(self, deconame, attr, defaults):
msg = (
"Decorator '{deco}' does not support the attribute '{attr}'. "
"These attributes are supported: {defaults}.".format(
deco=deconame, attr=attr, defaults=", ".join(defaults)
)
)
super(InvalidDecoratorAttribute, self).__init__(msg)
class CommandException(MetaflowException):
headline = "Invalid command"
class MetaflowDataMissing(MetaflowException):
headline = "Data missing"
class UnhandledInMergeArtifactsException(MetaflowException):
headline = "Unhandled artifacts in merge"
def __init__(self, msg, unhandled):
super(UnhandledInMergeArtifactsException, self).__init__(msg)
self.artifact_names = unhandled
class MissingInMergeArtifactsException(MetaflowException):
headline = "Missing artifacts in merge"
def __init__(self, msg, unhandled):
super(MissingInMergeArtifactsException, self).__init__(msg)
self.artifact_names = unhandled
# Import any exceptions defined by a Metaflow extensions packages
try:
from metaflow.extension_support import get_modules, multiload_globals
multiload_globals(get_modules("exceptions"), globals())
finally:
# Erase all temporary names to avoid leaking things
for _n in ["get_modules", "multiload_globals"]:
try:
del globals()[_n]
except KeyError:
pass
del globals()["_n"]
================================================
FILE: metaflow/extension_support/__init__.py
================================================
from __future__ import print_function
import importlib
import os
import re
import sys
import types
from collections import defaultdict, namedtuple
from importlib.abc import MetaPathFinder, Loader
from itertools import chain
from pathlib import Path
from typing import Any, Dict
from metaflow.meta_files import read_info_file
from metaflow.util import walk_without_cycles
#
# This file provides the support for Metaflow's extension mechanism which allows
# a Metaflow developer to extend metaflow by providing a package `metaflow_extensions`.
# Multiple such packages can be provided, and they will all be loaded into Metaflow in a
# way that is transparent to the user.
#
# NOTE: The conventions used here may change over time and this is an advanced feature.
#
# The general functionality provided here can be divided into three phases:
# - Package discovery: in this part, packages that provide metaflow extensions
# are discovered. This is contained in the `_get_extension_packages` function
# - Integration with Metaflow: throughout the Metaflow code, extension points
# are provided (they are given below in `_extension_points`). At those points,
# the core Metaflow code will invoke functions to load the packages discovered
# in the first phase. These functions are:
# - get_modules: Returns all modules that are contributing to the extension
# point; this is typically done first.
# - load_module: Simple loading of a specific module
# - load_globals: Utility function to load the globals from a module into
# another globals()-like object
# - alias_submodules: Determines the aliases for modules allowing metaflow.Z to alias
# metaflow_extensions.X.Y.Z for example. This supports the __mf_promote_submodules__
# construct as well as aliasing any modules present in the extension. This is
# typically used in conjunction with lazy_load_aliases which takes care of actually
# making the aliasing work lazily (ie: modules that are not already loaded are only
# loaded on use).
# - lazy_load_aliases: Adds loaders for all the module aliases produced by
# alias_submodules for example
# - multiload_globals: Convenience function to `load_globals` on all modules returned
# by `get_modules`
# - multiload_all: Convenience function to `load_globals` and
# `lazy_load_aliases(alias_submodules()) on all modules returned by `get_modules`
# - Packaging the extensions: when extensions need to be included in the code package,
# this allows the extensions to be properly included (including potentially non .py
# files). To support this:
# - dump_module_info dumps information in the INFO file allowing packaging to work
# in a Conda environment or a remote environment (it saves file paths, load order, etc)
# - package_mfext_package: allows the packaging of a single extension
# - package_mfext_all: packages all extensions
#
# The get_aliases_modules is used by Pylint to ignore some of the errors arising from
# aliasing packages
__all__ = (
"load_module",
"get_modules",
"dump_module_info",
"get_extensions_in_dir",
"extension_info",
"update_package_info",
"get_aliased_modules",
"package_mfext_package",
"package_mfext_all",
"load_globals",
"alias_submodules",
"EXT_PKG",
"lazy_load_aliases",
"multiload_globals",
"multiload_all",
"_ext_debug",
)
EXT_PKG = "metaflow_extensions"
EXT_CONFIG_REGEXP = re.compile(r"^mfextinit_[a-zA-Z0-9_-]+\.py$")
EXT_META_REGEXP = re.compile(r"^mfextmeta_[a-zA-Z0-9_-]+\.py$")
REQ_NAME = re.compile(r"^(([a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9])|[a-zA-Z0-9]).*$")
EXT_EXCLUDE_SUFFIXES = [".pyc"]
FINDER_TRANS = str.maketrans(".-", "__")
# To get verbose messages, set METAFLOW_DEBUG_EXT to 1
DEBUG_EXT = os.environ.get("METAFLOW_DEBUG_EXT", False)
# This is extracted only from environment variable and here separately from
# metaflow_config to prevent nasty circular dependencies
EXTENSIONS_SEARCH_DIRS = os.environ.get("METAFLOW_EXTENSIONS_SEARCH_DIRS", "").split(
os.pathsep
)
MFExtPackage = namedtuple("MFExtPackage", "package_name tl_package config_module")
MFExtModule = namedtuple("MFExtModule", "package_name tl_package module")
def load_module(module_name):
_ext_debug("Loading module '%s'..." % module_name)
return _attempt_load_module(module_name)
def get_modules(extension_point):
modules_to_load = []
if extension_point not in _extension_points:
raise RuntimeError(
"Metaflow extension point '%s' not supported" % extension_point
)
_ext_debug("Getting modules for extension point '%s'..." % extension_point)
for pkg in _pkgs_per_extension_point.get(extension_point, []):
_ext_debug(
" Found top-level '%s' from '%s'" % (pkg.tl_package, pkg.package_name)
)
m = _get_extension_config(
pkg.package_name, pkg.tl_package, extension_point, pkg.config_module
)
if m:
modules_to_load.append(m)
_ext_debug(" Loaded %s" % str(modules_to_load))
return modules_to_load
def dump_module_info(all_packages=None, pkgs_per_extension_point=None):
if all_packages is None:
all_packages = _all_packages
if pkgs_per_extension_point is None:
pkgs_per_extension_point = _pkgs_per_extension_point
sanitized_all_packages = dict()
# Strip out root_paths (we don't need it and no need to expose user's dir structure)
for k, v in all_packages.items():
sanitized_all_packages[k] = {
"root_paths": None,
"meta_module": v["meta_module"],
"files": v["files"],
"full_path_files": None,
"version": v["version"],
"package_version": v.get("package_version", ""),
"extension_name": v.get("extension_name", ""),
}
return "ext_info", [sanitized_all_packages, pkgs_per_extension_point]
def get_extensions_in_dir(d):
return _get_extension_packages(ignore_info_file=True, restrict_to_directories=[d])
def extension_info(packages=None):
if packages is None:
packages = _all_packages
# Returns information about installed extensions so it it can be stored in
# _graph_info.
return {
"installed": {
k: {
"dist_version": v["version"],
"package_version": v.get("package_version", ""),
"extension_name": v.get("extension_name", ""),
}
for k, v in packages.items()
},
}
def update_package_info(pkg_to_update=None, package_name=None, **kwargs):
pkg = None
if pkg_to_update:
pkg = pkg_to_update
elif package_name:
pkg = _all_packages.get(package_name)
for k, v in kwargs.items():
if k in pkg:
raise ValueError(
"Trying to overwrite existing key '%s' for package %s" % (k, str(pkg))
)
pkg[k] = v
return pkg
def get_aliased_modules():
return _aliased_modules
def package_mfext_package(package_name):
from metaflow.util import to_unicode
_ext_debug("Packaging '%s'" % package_name)
pkg_info = _all_packages.get(package_name, None)
if pkg_info and pkg_info.get("root_paths", None):
if pkg_info["full_path_files"]:
# Case for initial packaging
for f, short_name in zip(pkg_info["full_path_files"], pkg_info["files"]):
f_unicode = os.path.join(EXT_PKG, to_unicode(short_name))
_ext_debug(" Adding '%s' as '%s'" % (f, f_unicode))
yield f, f_unicode
else:
# When re-packaging (ie: packaging Metaflow from a Metaflow run):
single_path = len(pkg_info["root_paths"]) == 1
for p in pkg_info["root_paths"]:
root_path = to_unicode(p)
for f in pkg_info["files"]:
f_unicode = to_unicode(f)
fp = os.path.join(root_path, f_unicode)
if single_path or os.path.isfile(fp):
_ext_debug(" Adding '%s'" % fp)
yield fp, os.path.join(EXT_PKG, f_unicode)
def package_mfext_all():
# When packaging extensions, we always add a __init__.py to make
# the packaged metaflow_extensions directory "self-contained" so that
# python doesn't go and search other parts of the system for more
# metaflow_extensions.
if _all_packages:
yield os.path.join(
os.path.dirname(os.path.abspath(__file__)), "_empty_file.py"
), os.path.join(EXT_PKG, "__init__.py")
for p in _all_packages:
yield from package_mfext_package(p)
def package_mfext_all_descriptions():
return _all_packages
def load_globals(module, dst_globals, extra_indent=False):
if extra_indent:
extra_indent = " "
else:
extra_indent = ""
_ext_debug("%sLoading globals from '%s'" % (extra_indent, module.__name__))
for n, o in module.__dict__.items():
if not n.startswith("__") and not isinstance(o, types.ModuleType):
_ext_debug("%s Importing '%s'" % (extra_indent, n))
dst_globals[n] = o
def alias_submodules(module, tl_package, extension_point, extra_indent=False):
if extra_indent:
extra_indent = " "
else:
extra_indent = ""
lazy_load_custom_modules = {}
_ext_debug("%sAliasing submodules for '%s'" % (extra_indent, module.__name__))
addl_modules = module.__dict__.get("__mf_promote_submodules__")
if addl_modules:
# We make an alias for these modules which the extension author wants to
# expose but since it may not already be loaded, we don't load it either
# TODO: This does not properly work for multiple packages that overwrite
# their submodule for example if EXT_PKG.X.datatools.Y is provided
# by two packages. For now, don't do this.
if extension_point is not None:
lazy_load_custom_modules.update(
{
"metaflow.%s.%s"
% (extension_point, k): "%s.%s.%s.%s"
% (EXT_PKG, tl_package, extension_point, k)
for k in addl_modules
}
)
else:
# Top-level "metaflow" overrides
lazy_load_custom_modules.update(
{
"metaflow.%s" % k: "%s.%s.%s" % (EXT_PKG, tl_package, k)
for k in addl_modules
}
)
if lazy_load_custom_modules:
_ext_debug(
"%s Found explicit promotions in __mf_promote_submodules__: %s"
% (extra_indent, str(list(lazy_load_custom_modules.keys())))
)
for n, o in module.__dict__.items():
if (
isinstance(o, types.ModuleType)
and o.__package__
and o.__package__.startswith("%s.%s" % (EXT_PKG, tl_package))
):
# NOTE: The condition above prohibits loading across tl_packages. We
# can relax if needed but may not be a great idea.
if extension_point is not None:
lazy_load_custom_modules["metaflow.%s.%s" % (extension_point, n)] = o
else:
lazy_load_custom_modules["metaflow.%s" % n] = o
_ext_debug(
"%s Will create the following module aliases: %s"
% (extra_indent, str(list(lazy_load_custom_modules.keys())))
)
_aliased_modules.extend(lazy_load_custom_modules.keys())
return lazy_load_custom_modules
def lazy_load_aliases(aliases):
if aliases:
sys.meta_path = [_LazyFinder(aliases)] + sys.meta_path
def multiload_globals(modules, dst_globals):
for m in modules:
load_globals(m.module, dst_globals, extra_indent=True)
def multiload_all(modules, extension_point, dst_globals):
for m in modules:
# Note that we load aliases separately (as opposed to in one fell swoop) so
# modules loaded later in `modules` can depend on them
lazy_load_aliases(
alias_submodules(m.module, m.tl_package, extension_point, extra_indent=True)
)
load_globals(m.module, dst_globals)
_py_ver = sys.version_info[:2]
_aliased_modules = []
import importlib.util
if _py_ver >= (3, 8):
from importlib import metadata
elif _py_ver >= (3, 7):
from metaflow._vendor.v3_7 import importlib_metadata as metadata
else:
from metaflow._vendor.v3_6 import importlib_metadata as metadata
# Extension points are the directories that can be present in a EXT_PKG to
# contribute to that extension point. For example, if you have
# metaflow_extensions/X/plugins, your extension contributes to the plugins
# extension point.
# IMPORTANT: More specific paths must appear FIRST (before any less specific one). For
# efficiency, put the less specific ones directly under more specific ones.
_extension_points = [
"plugins.env_escape",
"plugins.cards",
"plugins.datatools",
"plugins",
"config",
"exceptions",
"toplevel",
"cmd",
"alias",
]
def _ext_debug(*args, **kwargs):
if DEBUG_EXT:
init_str = "%s:" % EXT_PKG
kwargs["file"] = sys.stderr
print(init_str, *args, **kwargs)
def _get_extension_packages(ignore_info_file=False, restrict_to_directories=None):
# If we have an INFO file with the appropriate information (if running from a saved
# code package for example), we use that directly
# Pre-compute on _extension_points
info_content = read_info_file()
if not ignore_info_file and info_content:
all_pkg, ext_to_pkg = info_content.get("ext_info", (None, None))
if all_pkg is not None and ext_to_pkg is not None:
_ext_debug("Loading pre-computed information from INFO file")
# We need to properly convert stuff in ext_to_pkg
for k, v in ext_to_pkg.items():
v = [MFExtPackage(*d) for d in v]
ext_to_pkg[k] = v
return all_pkg, ext_to_pkg
# Late import to prevent some circular nastiness
if restrict_to_directories is None and EXTENSIONS_SEARCH_DIRS != [""]:
restrict_to_directories = EXTENSIONS_SEARCH_DIRS
# Check if we even have extensions
try:
extensions_module = importlib.import_module(EXT_PKG)
except ImportError as e:
# e.name is set to the name of the package that fails to load
# so don't error ONLY IF the error is importing this module (but do
# error if there is a transitive import error)
if not (isinstance(e, ModuleNotFoundError) and e.name == EXT_PKG):
raise
return {}, {}
if restrict_to_directories:
restrict_to_directories = [
Path(p).resolve().as_posix() for p in restrict_to_directories
]
# There are two "types" of packages:
# - those installed on the system (distributions)
# - those present in the PYTHONPATH
# We have more information on distributions (including dependencies) and more
# effective ways to get file information from them (they include the full list of
# files installed) so we treat them separately from packages purely in PYTHONPATH.
# They are also the more likely way that users will have extensions present, so
# we optimize for that case.
# At this point, we look at all the paths and create a set. As we find distributions
# that match it, we will remove from the set and then will be left with any
# PYTHONPATH "packages"
all_paths = set()
# Records which finders provided which paths if applicable
# This is then later used to determine which paths belong
# to which distribution
finders_to_paths = dict()
# Temporary variables to support the loop below and make sure we loop through all
# the paths in the submodule_search_locations including calling the path hooks.
# We could skip calling things on the path hooks since the module was just imported
# by importlib so the values are probably already in submodule_search_locations but
# there may be cases where we need to call multiple times. This also allows us to tie
# the finders (ie: the path hooks) back to the distribution since they share a name.
# This is useful in knowing which paths we consider as belonging to a distribution so
# we know which order to load it in.
seen_path_values = set()
new_paths = extensions_module.__spec__.submodule_search_locations
_ext_debug("Found initial paths: %s" % str(new_paths))
while new_paths:
paths = new_paths
new_paths = []
for p in paths:
if p in seen_path_values:
continue
if os.path.isdir(p):
all_paths.add(Path(p).resolve().as_posix())
elif p in sys.path_importer_cache:
# We have a path hook that we likely need to call to get the actual path
addl_spec = sys.path_importer_cache[p].find_spec(EXT_PKG)
if addl_spec is not None and addl_spec.submodule_search_locations:
new_paths.extend(addl_spec.submodule_search_locations)
# Remove .__path_hook__ and add .py to match the name of the file
# installed by the distribution
finder_name = p[:-14].translate(FINDER_TRANS) + ".py"
new_dirs = [
d
for d in addl_spec.submodule_search_locations
if os.path.isdir(d)
]
_ext_debug(
"Finder %s added directories %s"
% (finder_name, ", ".join(new_dirs))
)
finders_to_paths.setdefault(finder_name, []).extend(new_dirs)
else:
# This may not be as required since it is likely the importer cache has
# everything already but just in case, we will also go through the
# path hooks and see if we find another one
for path_hook in sys.path_hooks:
try:
finder = path_hook(p)
addl_spec = finder.find_spec(EXT_PKG)
if (
addl_spec is not None
and addl_spec.submodule_search_locations
):
finder_name = p[:-14].translate(FINDER_TRANS) + ".py"
new_dirs = [
d
for d in addl_spec.submodule_search_locations
if os.path.isdir(d)
]
_ext_debug(
"Finder (through hooks) %s added directories %s"
% (finder_name, ", ".join(new_dirs))
)
finders_to_paths.setdefault(finder_name, []).extend(
new_dirs
)
new_paths.extend(addl_spec.submodule_search_locations)
break
except ImportError:
continue
seen_path_values.add(p)
_ext_debug("Found packages present at %s" % str(all_paths))
if restrict_to_directories:
_ext_debug(
"Processed packages will be restricted to %s" % str(restrict_to_directories)
)
list_ext_points = [x.split(".") for x in _extension_points]
init_ext_points = [x[0] for x in list_ext_points]
# NOTE: For distribution packages, we will rely on requirements to determine the
# load order of extensions: if distribution A and B both provide EXT_PKG and
# distribution A depends on B then when returning modules in `get_modules`, we will
# first return B and THEN A. We may want
# other ways of specifying "load me after this if it exists" without depending on
# the package. One way would be to rely on the description and have that info there.
# Not sure of the use, though, so maybe we can skip for now.
# Key: distribution name/package path
# Value: Dict containing:
# root_paths: The root path for all the files in this package. Can be a list in
# some rare cases
# meta_module: The module to the meta file (if any) that contains information about
# how to package this extension (suffixes to include/exclude)
# files: The list of files to be included (or considered for inclusion) when
# packaging this extension
mf_ext_packages = dict()
# Key: extension point (one of _extension_point)
# Value: another dictionary with
# Key: distribution name/full path to package
# Value: another dictionary with
# Key: Top-level package name (so in metaflow_extensions.X...., the X)
# Value: MFExtPackage
extension_points_to_pkg = defaultdict(dict)
# Key: string: configuration file for a package
# Value: list: packages that this configuration file is present in
config_to_pkg = defaultdict(list)
# Same as config_to_pkg for meta files
meta_to_pkg = defaultdict(list)
# The file passed to process_file has EXT_PKG as the first component
# root_dir also has EXT_PKG as the last component
def process_file(state: Dict[str, Any], root_dir: str, file: str):
parts = file.split("/")
if len(parts) > 1 and parts[0] == EXT_PKG:
# Check for top-level files (ie: meta file which specifies how to package
# the extension and __init__.py file)
if len(parts) == 2:
# Ensure that we don't have a __init__.py to force this package to
# be a NS package
if parts[1] == "__init__.py":
raise RuntimeError(
"Package '%s' providing '%s' is not an implicit namespace "
"package as required" % (state["name"], EXT_PKG)
)
# Check for any metadata; we can only have one metadata per
# distribution at most
if EXT_META_REGEXP.match(parts[1]) is not None:
potential_meta_module = ".".join([EXT_PKG, parts[1][:-3]])
if state["meta_module"]:
raise RuntimeError(
"Package '%s' defines more than one meta configuration: "
"'%s' and '%s' (at least)"
% (
state["name"],
state["meta_module"],
potential_meta_module,
)
)
state["meta_module"] = potential_meta_module
_ext_debug(
"Found meta '%s' for '%s'"
% (state["meta_module"], state["name"])
)
meta_to_pkg[state["meta_module"]].append(state["name"])
# Record the file as a candidate for inclusion when packaging if
# needed
if not any(parts[-1].endswith(suffix) for suffix in EXT_EXCLUDE_SUFFIXES):
# Strip out metaflow_extensions from the file
state["files"].append(os.path.join(*parts[1:]))
state["full_path_files"].append(os.path.join(root_dir, *parts[1:]))
if parts[1] in init_ext_points:
# This is most likely a problem as we need an intermediate
# "identifier"
raise RuntimeError(
"Package '%s' should conform to '%s.X.%s' and not '%s.%s' where "
"X is your organization's name for example"
% (
state["name"],
EXT_PKG,
parts[1],
EXT_PKG,
parts[1],
)
)
if len(parts) > 3 and parts[0] == EXT_PKG:
# We go over _extension_points *in order* to make sure we get more
# specific paths first
# To give useful errors in case multiple top-level packages in
# one package
dist_full_name = "%s[%s]" % (state["name"], parts[1])
for idx, ext_list in enumerate(list_ext_points):
if (
len(parts) > len(ext_list) + 2
and parts[2 : 2 + len(ext_list)] == ext_list
):
# Check if this is an "init" file
config_module = None
if len(parts) == len(ext_list) + 3 and (
EXT_CONFIG_REGEXP.match(parts[-1]) is not None
or parts[-1] == "__init__.py"
):
parts[-1] = parts[-1][:-3] # Remove the .py
config_module = ".".join(parts)
config_to_pkg[config_module].append(dist_full_name)
cur_pkg = (
extension_points_to_pkg[_extension_points[idx]]
.setdefault(state["name"], {})
.get(parts[1])
)
if cur_pkg is not None:
if (
config_module is not None
and cur_pkg.config_module is not None
):
raise RuntimeError(
"Package '%s' defines more than one "
"configuration file for '%s': '%s' and '%s'"
% (
dist_full_name,
_extension_points[idx],
config_module,
cur_pkg.config_module,
)
)
if config_module is not None:
_ext_debug(
" Top-level '%s' found config file '%s'"
% (parts[1], config_module)
)
extension_points_to_pkg[_extension_points[idx]][
state["name"]
][parts[1]] = MFExtPackage(
package_name=state["name"],
tl_package=parts[1],
config_module=config_module,
)
else:
_ext_debug(
" Top-level '%s' extends '%s' with config '%s'"
% (parts[1], _extension_points[idx], config_module)
)
extension_points_to_pkg[_extension_points[idx]][state["name"]][
parts[1]
] = MFExtPackage(
package_name=state["name"],
tl_package=parts[1],
config_module=config_module,
)
break
# 1st step: look for distributions (the common case)
for dist in metadata.distributions():
if any(
[pkg == EXT_PKG for pkg in (dist.read_text("top_level.txt") or "").split()]
):
# Note that locate_file does not actually make sure the file exists. It just
# appends whatever you pass in to locate_file to the folder containing the
# metadata for the distribution. We will therefore check if we are actually
# seeing files in that directory using has_file_in_dist_root.
dist_root = dist.locate_file(EXT_PKG).resolve().as_posix()
all_roots = []
has_file_in_dist_root = False
dist_name = dist.metadata["Name"]
dist_version = dist.metadata["Version"]
if restrict_to_directories:
parent_dirs = list(
p.as_posix() for p in Path(dist_root).resolve().parents
)
if all(p not in parent_dirs for p in restrict_to_directories):
_ext_debug(
"Ignoring package at %s as it is not in the considered directories"
% dist_root
)
continue
if dist_name in mf_ext_packages:
_ext_debug(
"Ignoring duplicate package '%s' (duplicate paths in sys.path? (%s))"
% (dist_name, str(sys.path))
)
continue
_ext_debug(
"Found extension package '%s' at presumptive path '%s'..."
% (dist_name, dist_root)
)
state = {
"name": dist_name,
"files": [],
"full_path_files": [],
"meta_module": None, # Meta information about the package (if applicable)
}
addl_dirs = []
# At this point, we check to see what extension points this package
# contributes to. This is to enable multiple namespace packages to contribute
# to the same extension point (for example, you may have multiple packages
# that have plugins)
for f in dist.files or []:
if f.suffix == ".pth":
# This is a directory we need to walk to find the files
d = f.read_text().strip()
if os.path.isdir(d):
_ext_debug(" Found additional directory '%s' from .pth" % d)
addl_dirs.append(d)
elif str(f).startswith("__editable__"):
# This is a finder file because we already checked for .pth
_ext_debug(
" Added additional directories from finder '%s': %s"
% (str(f), ", ".join(finders_to_paths.get(str(f), [])))
)
addl_dirs.extend(finders_to_paths.get(str(f), []))
elif f.parts[0] == EXT_PKG:
has_file_in_dist_root = True
process_file(state, dist_root, str(f))
else:
# We ignore the file
continue
if has_file_in_dist_root:
all_roots.append(dist_root)
all_paths.discard(dist_root)
# Now walk any additional directory for this distribution as well
for addl_dir in addl_dirs:
if restrict_to_directories:
parent_dirs = list(
p.as_posix() for p in Path(addl_dir).resolve().parents
)
if all(p not in parent_dirs for p in restrict_to_directories):
_ext_debug(
"Ignoring package at %s as it is not in the considered "
"directories" % addl_dir
)
continue
base_depth = len(addl_dir.split("/"))
# .pth files give addl_dirs that don't have EXT_PKG at the end but
# finders do so check this
if addl_dir.split("/")[-1] == EXT_PKG:
base_depth -= 1
else:
addl_dir = os.path.join(addl_dir, EXT_PKG)
all_roots.append(addl_dir)
all_paths.discard(addl_dir)
_ext_debug(" Walking additional directory '%s'" % addl_dir)
for root, _, files in walk_without_cycles(addl_dir):
relative_root = "/".join(root.split("/")[base_depth:])
for f in files:
process_file(state, addl_dir, os.path.join(relative_root, f))
mf_ext_packages[dist_name] = {
"root_paths": all_roots,
"meta_module": state["meta_module"],
"full_path_files": state["full_path_files"],
"files": state["files"],
"version": dist_version,
}
if addl_dirs:
# If we have additional directories, this means that we may need to filter
# the files based on the meta information about the module since we
# walked down the directories instead of relying simply on files that
# were packaged with the distribution. We do this now so we don't have to
# do it multiple times later for packaging. This is only useful if the
# distribution does not completely specify the files that need to be
# installed. In the case where the distribution completely specifies the
# files, we ignore the meta module
_filter_files_package(mf_ext_packages[dist_name])
# At this point, we have all the packages that contribute to EXT_PKG,
# we now check to see if there is an order to respect based on dependencies. We will
# return an ordered list that respects that order and is ordered alphabetically in
# case of ties. We do not do any checks because we rely on pip to have done those.
# Basically topological sort based on dependencies.
pkg_to_reqs_count = {}
req_to_dep = {}
for pkg_name in mf_ext_packages:
req_count = 0
req_pkgs = [
REQ_NAME.match(x).group(1) for x in metadata.requires(pkg_name) or []
]
for req_pkg in req_pkgs:
if req_pkg in mf_ext_packages:
req_count += 1
req_to_dep.setdefault(req_pkg, []).append(pkg_name)
pkg_to_reqs_count[pkg_name] = req_count
# Find roots
mf_pkg_list = []
to_process = []
for pkg_name, count in pkg_to_reqs_count.items():
if count == 0:
to_process.append(pkg_name)
# Add them in alphabetical order
to_process.sort()
mf_pkg_list.extend(to_process)
# Find rest topologically
while to_process:
next_round = []
for pkg_name in to_process:
del pkg_to_reqs_count[pkg_name]
for dep in req_to_dep.get(pkg_name, []):
cur_req_count = pkg_to_reqs_count[dep]
if cur_req_count == 1:
next_round.append(dep)
else:
pkg_to_reqs_count[dep] = cur_req_count - 1
# Add those in alphabetical order
next_round.sort()
mf_pkg_list.extend(next_round)
to_process = next_round
# Check that we got them all
if len(pkg_to_reqs_count) > 0:
raise RuntimeError(
"Unresolved dependencies in '%s': %s"
% (EXT_PKG, ", and ".join("'%s'" % p for p in pkg_to_reqs_count))
)
_ext_debug("'%s' distributions order is %s" % (EXT_PKG, str(mf_pkg_list)))
# We check if we have any additional packages that were not yet installed that
# we need to use. We always put them *last* in the load order and put them
# alphabetically.
all_paths_list = list(all_paths)
all_paths_list.sort()
# This block of code is the equivalent of the one above for distributions except
# for PYTHONPATH packages.
package_name_to_path = dict()
if len(all_paths_list) > 0:
_ext_debug("Non installed packages present at %s" % str(all_paths))
for package_count, package_path in enumerate(all_paths_list):
if restrict_to_directories:
parent_dirs = list(
p.as_posix() for p in Path(package_path).resolve().parents
)
if all(p not in parent_dirs for p in restrict_to_directories):
_ext_debug(
"Ignoring non-installed package at %s as it is not in "
"the considered directories" % package_path
)
continue
# We give an alternate name for the visible package name. It is
# not exposed to the end user but used to refer to the package, and it
# doesn't provide much additional information to have the full path
# particularly when it is on a remote machine.
# We keep a temporary mapping around for error messages while loading for
# the first time.
package_name = "_pythonpath_%d" % package_count
_ext_debug(
"Walking path %s (package name %s)" % (package_path, package_name)
)
package_name_to_path[package_name] = package_path
base_depth = len(package_path.split("/"))
state = {
"name": package_name,
"files": [],
"full_path_files": [],
"meta_module": None,
}
for root, _, files in walk_without_cycles(package_path):
relative_root = "/".join(root.split("/")[base_depth - 1 :])
for f in files:
process_file(state, package_path, os.path.join(relative_root, f))
if state["files"]:
mf_pkg_list.append(package_name)
mf_ext_packages[package_name] = {
"root_paths": [package_path],
"meta_module": state["meta_module"],
"full_path_files": state["full_path_files"],
"files": state["files"],
"version": "_local_",
}
# Always filter here since we don't have any distribution information
_filter_files_package(mf_ext_packages[package_name])
else:
_ext_debug("Skipping package as no files found (empty dir?)")
# Sanity check that we only have one package per configuration file.
# This prevents multiple packages from providing the same named configuration
# file which would result in one overwriting the other if they are both installed.
errors = []
for m, packages in config_to_pkg.items():
if len(packages) > 1:
errors.append(
" Packages %s define the same configuration module '%s'"
% (", and ".join(["'%s'" % p for p in packages]), m)
)
for m, packages in meta_to_pkg.items():
if len(packages) > 1:
errors.append(
" Packages %s define the same meta module '%s'"
% (", and ".join(["'%s'" % p for p in packages]), m)
)
if errors:
raise RuntimeError(
"Conflicts in '%s' files:\n%s" % (EXT_PKG, "\n".join(errors))
)
extension_points_to_pkg.default_factory = None
# We have the load order globally; we now figure it out per extension point.
for k, v in extension_points_to_pkg.items():
# v is a dict distributionName/packagePath -> (dict tl_name -> MFPackage)
l = [v[pkg].values() for pkg in mf_pkg_list if pkg in v]
# In the case of the plugins.cards extension we allow those packages
# to be ns packages, so we only list the package once (in its first position).
# In all other cases, we error out if we don't have a configuration file for the
# package (either a __init__.py of an explicit mfextinit_*.py)
final_list = []
null_config_tl_package = set()
for pkg in chain(*l):
if pkg.config_module is None:
if k == "plugins.cards":
# This is allowed here but we only keep one
if pkg.tl_package in null_config_tl_package:
continue
null_config_tl_package.add(pkg.tl_package)
else:
package_path = package_name_to_path.get(pkg.package_name)
if package_path:
package_path = "at '%s'" % package_path
else:
package_path = "'%s'" % pkg.package_name
raise RuntimeError(
"Package %s does not define a configuration file for '%s'"
% (package_path, k)
)
final_list.append(pkg)
extension_points_to_pkg[k] = final_list
return mf_ext_packages, extension_points_to_pkg
def _attempt_load_module(module_name):
try:
extension_module = importlib.import_module(module_name)
except ImportError as e:
# e.name is set to the name of the package that fails to load
# so don't error ONLY IF the error is importing this module (but do
# error if there is a transitive import error)
errored_names = [EXT_PKG]
parts = module_name.split(".")
for p in parts[1:]:
errored_names.append("%s.%s" % (errored_names[-1], p))
if not (isinstance(e, ModuleNotFoundError) and e.name in errored_names):
print(
"The following exception occurred while trying to load '%s' ('%s')"
% (EXT_PKG, module_name)
)
raise
_ext_debug(" Unknown error when loading '%s': %s" % (module_name, e))
return None
else:
return extension_module
def _filter_files_package(pkg):
if pkg and pkg["root_paths"] and pkg["meta_module"]:
meta_module = _attempt_load_module(pkg["meta_module"])
if meta_module:
filter_function = meta_module.__dict__.get("filter_function")
include_suffixes = meta_module.__dict__.get("include_suffixes")
exclude_suffixes = meta_module.__dict__.get("exclude_suffixes")
# Behavior is as follows:
# - if nothing specified, include all files (so do nothing here)
# - if filter_function specified, call that function on the list of files
# and only include the files where the function returns True. Note that
# the function will always be passed a value that starts with
# metaflow_extensions/...
# - if include_suffixes, only include those suffixes
# - if *not* include_suffixes but exclude_suffixes, include everything *except*
# files ending with that suffix
new_files, new_full_path_files = [], []
if filter_function:
for short_file, full_file in zip(pkg["files"], pkg["full_path_files"]):
try:
if filter_function(os.path.join(EXT_PKG, short_file)):
new_files.append(short_file)
new_full_path_files.append(full_file)
except Exception as e:
_ext_debug(
" Exception '%s' when calling filter_function on "
"'%s', ignoring file" % (e, short_file)
)
elif include_suffixes:
for short_file, full_file in zip(pkg["files"], pkg["full_path_files"]):
if any(
[short_file.endswith(suffix) for suffix in include_suffixes]
):
new_files.append(short_file)
new_full_path_files.append(full_file)
elif exclude_suffixes:
for short_file, full_file in zip(pkg["files"], pkg["full_path_files"]):
if not any(
[short_file.endswith(suffix) for suffix in exclude_suffixes]
):
new_files.append(short_file)
new_full_path_files.append(full_file)
else:
new_files = pkg["files"]
new_full_path_files = pkg["full_path_files"]
pkg["files"] = new_files
pkg["full_path_files"] = new_full_path_files
_all_packages, _pkgs_per_extension_point = _get_extension_packages()
def _get_extension_config(distribution_name, tl_pkg, extension_point, config_module):
if config_module is not None and not config_module.endswith("__init__"):
module_name = config_module
# file_path below will be /root/metaflow_extensions/X/Y/mfextinit_Z.py and
# module name is metaflow_extensions.X.Y.mfextinit_Z so if we want to strip to
# /root/metaflow_extensions, we need to remove this number of elements from the
# filepath
strip_from_filepath = len(module_name.split(".")) - 1
else:
module_name = ".".join([EXT_PKG, tl_pkg, extension_point])
# file_path here will be /root/metaflow_extensions/X/Y/__init__.py BUT
# module name is metaflow_extensions.X.Y so we have a 1 off compared to the
# previous case
strip_from_filepath = len(module_name.split("."))
_ext_debug(" Attempting to load '%s'" % module_name)
extension_module = _attempt_load_module(module_name)
if extension_module:
# We update the path to this module. This is useful if we need to package this
# package again. Note that in most cases, packaging happens in the outermost
# local python environment (non Conda and not remote) so we already have the
# root_paths set when we are initially looking for metaflow_extensions package.
# This code allows for packaging while running inside a Conda environment or
# remotely where the root_paths has been changed since the initial packaging.
# This currently does not happen much.
if _all_packages[distribution_name]["root_paths"] is None:
file_path = getattr(extension_module, "__file__")
if file_path:
# Common case where this is an actual init file (mfextinit_X.py or __init__.py)
root_paths = ["/".join(file_path.split("/")[:-strip_from_filepath])]
else:
# Only used for plugins.cards where the package can be a NS package. In
# this case, __path__ will have things like /root/metaflow_extensions/X/Y
# and module name will be metaflow_extensions.X.Y
root_paths = [
"/".join(p.split("/")[: -len(module_name.split(".")) + 1])
for p in extension_module.__path__
]
_ext_debug("Package '%s' is rooted at %s" % (distribution_name, root_paths))
_all_packages[distribution_name]["root_paths"] = root_paths
return MFExtModule(
package_name=distribution_name, tl_package=tl_pkg, module=extension_module
)
return None
class _AliasLoader(Loader):
def __init__(self, alias, orig):
self._alias = alias
self._orig = orig
def create_module(self, spec):
_ext_debug(
"Loading aliased module '%s' at '%s' " % (str(self._orig), spec.name)
)
if isinstance(self._orig, str):
try:
return importlib.import_module(self._orig)
except ImportError:
raise ImportError(
"No module found '%s' (aliasing '%s')" % (spec.name, self._orig)
)
elif isinstance(self._orig, types.ModuleType):
# We are aliasing a module, so we just return that one
return self._orig
else:
return super().create_module(spec)
def exec_module(self, module):
# Override the name to make it a bit nicer. We keep the old name so that
# we can refer to it when we load submodules
if not hasattr(module, "__orig_name__"):
module.__orig_name__ = module.__name__
module.__name__ = self._alias
class _OrigLoader(Loader):
def __init__(
self,
fullname,
orig_loader,
previously_loaded_module=None,
previously_loaded_parent_module=None,
):
self._fullname = fullname
self._orig_loader = orig_loader
self._previously_loaded_module = previously_loaded_module
self._previously_loaded_parent_module = previously_loaded_parent_module
def create_module(self, spec):
_ext_debug(
"Loading original module '%s' (will be loaded at '%s'); spec is %s"
% (spec.name, self._fullname, str(spec))
)
self._orig_name = spec.name
return self._orig_loader.create_module(spec)
def exec_module(self, module):
try:
# Perform all actions of the original loader
self._orig_loader.exec_module(module)
except BaseException:
raise # We re-raise it always; the `finally` clause will still restore things
else:
# It loaded, we move and rename appropriately
module.__spec__.name = self._fullname
module.__orig_name__ = module.__name__
module.__name__ = self._fullname
module.__package__ = module.__spec__.parent # assumption since 3.6
sys.modules[self._fullname] = module
del sys.modules[self._orig_name]
finally:
# At this point, the original module is loaded with the original name. We
# want to replace it with previously_loaded_module if it exists. We
# also replace the parent properly
if self._previously_loaded_module:
sys.modules[self._orig_name] = self._previously_loaded_module
if self._previously_loaded_parent_module:
sys.modules[".".join(self._orig_name.split(".")[:-1])] = (
self._previously_loaded_parent_module
)
class _LazyFinder(MetaPathFinder):
# This _LazyFinder implements the Importer Protocol defined in PEP 302
def __init__(self, handled):
# Dictionary:
# Key: name of the module to handle
# Value:
# - A string: a pathspec to the module to load
# - A module: the module to load
self._handled = handled if handled else {}
# This is used to revert to regular loading when trying to load
# the over-ridden module
self._temp_excluded_prefix = set()
# This is used to determine if we should be searching in _orig modules. Basically,
# when a relative import is done from a module in _orig, we want to search in
# the _orig "tree"
self._orig_search_paths = set()
def find_spec(self, fullname, path, target=None):
# If we are trying to load a shadowed module (ending in ._orig), we don't
# say we handle it
# _ext_debug(
# "Looking for %s in %s with target %s" % (fullname, str(path), target)
# )
if any([fullname.startswith(e) for e in self._temp_excluded_prefix]):
return None
# If this is something we directly handle, return our loader
if fullname in self._handled:
return importlib.util.spec_from_loader(
fullname, _AliasLoader(fullname, self._handled[fullname])
)
# For the first pass when we try to load a shadowed module, we send it back
# without the ._orig and that will find the original spec of the module
# Note that we handle mymodule._orig.orig_submodule as well as mymodule._orig.
# Basically, the original module and any of the original submodules are
# available under _orig.
name_parts = fullname.split(".")
try:
orig_idx = name_parts.index("_orig")
except ValueError:
orig_idx = -1
if orig_idx > -1 and ".".join(name_parts[:orig_idx]) in self._handled:
orig_name = ".".join(name_parts[:orig_idx] + name_parts[orig_idx + 1 :])
parent_name = None
if orig_idx != len(name_parts) - 1:
# We have a parent module under the _orig portion so for example, if
# we load mymodule._orig.orig_submodule, our parent is mymodule._orig.
# However, since mymodule is currently shadowed, we need to reset
# the parent module properly. We know it is already loaded (since modules
# are loaded hierarchically)
parent_name = ".".join(
name_parts[:orig_idx] + name_parts[orig_idx + 1 : -1]
)
_ext_debug("Looking for original module '%s'" % orig_name)
prefix = ".".join(name_parts[:orig_idx])
self._temp_excluded_prefix.add(prefix)
# We also have to remove the module temporarily while we look for the
# new spec since otherwise it returns the spec of that loaded module.
# module is also restored *after* we call `create_module` in the loader
# otherwise it just returns None. We also swap out the parent module so that
# the search can start from there.
loaded_module = sys.modules.get(orig_name)
if loaded_module:
del sys.modules[orig_name]
parent_module = sys.modules.get(parent_name) if parent_name else None
if parent_module:
sys.modules[parent_name] = sys.modules[".".join([parent_name, "_orig"])]
# This finds the spec that would have existed had we not added all our
# _LazyFinders
spec = importlib.util.find_spec(orig_name)
self._temp_excluded_prefix.remove(prefix)
if not spec:
return None
if spec.submodule_search_locations:
self._orig_search_paths.update(spec.submodule_search_locations)
_ext_debug("Found original spec %s" % spec)
# Change the spec
spec.loader = _OrigLoader(
fullname,
spec.loader,
loaded_module,
parent_module,
)
return spec
for p in path or []:
if p in self._orig_search_paths:
# We need to look in some of the "_orig" modules
orig_override_name = ".".join(
name_parts[:-1] + ["_orig", name_parts[-1]]
)
_ext_debug(
"Looking for %s as an original module: searching for %s"
% (fullname, orig_override_name)
)
return importlib.util.find_spec(orig_override_name)
if len(name_parts) > 1:
# This checks for submodules of things we handle. We check for the most
# specific submodule match and use that
chop_idx = 1
while chop_idx < len(name_parts):
parent_name = ".".join(name_parts[:-chop_idx])
if parent_name in self._handled:
orig = self._handled[parent_name]
if isinstance(orig, types.ModuleType):
orig_name = ".".join(
[orig.__orig_name__] + name_parts[-chop_idx:]
)
else:
orig_name = ".".join([orig] + name_parts[-chop_idx:])
return importlib.util.spec_from_loader(
fullname, _AliasLoader(fullname, orig_name)
)
chop_idx += 1
return None
================================================
FILE: metaflow/extension_support/_empty_file.py
================================================
# This file serves as a __init__.py for metaflow_extensions or metaflow
# packages when they are packaged and needs to remain empty.
================================================
FILE: metaflow/extension_support/cmd.py
================================================
import importlib
import traceback
from metaflow.metaflow_config_funcs import from_conf
from . import _ext_debug, get_modules
_all_cmds = []
_all_cmds_dict = {}
# Set ENABLED_ and _TOGGLE_ variables for commands
ENABLED_CMD = from_conf("ENABLED_CMD")
_TOGGLE_CMD = []
# This file is identical in functionality to the plugins.py file. Please refer to that
# one for more information on what the functions do.
def process_cmds(module_globals):
global _all_cmds, _all_cmds_dict, ENABLED_CMD, _TOGGLE_CMD
_resolve_relative_paths(module_globals)
_all_cmds = _get_ext_cmds(module_globals)
try:
modules_to_import = get_modules("cmd")
# This is like multiload_all but we load globals independently since we just care
# about the TOGGLE and ENABLED values
for m in modules_to_import:
for n, o in m.module.__dict__.items():
if n == "TOGGLE_CMD":
_TOGGLE_CMD.extend(o)
elif n == "ENABLED_CMD":
ENABLED_CMD = o
_resolve_relative_paths(m.module.__dict__)
_all_cmds.extend(_get_ext_cmds(m.module.__dict__))
except Exception as e:
_ext_debug("\tWARNING: ignoring all cmds due to error during import: %s" % e)
print(
"WARNING: Cmds did not load -- ignoring all of them which may not "
"be what you want: %s" % e
)
traceback.print_exc()
# At this point, we have _all_cmds populated with all the tuples
# (name, module_class) from all the cmds in all the extensions (if any)
# We build a dictionary taking the latest presence for each name (so plugins
# override metaflow core)
for name, class_path in _all_cmds:
_ext_debug(" Adding command '%s' from '%s'" % (name, class_path))
_all_cmds_dict[name] = class_path
# Resolve the ENABLED_CMD variable. The rules are the following:
# - if ENABLED_CMD is non None, it means it was either set directly by the user
# in a configuration file, on the command line or by an extension. In that case
# we honor those wishes and completely ignore the extensions' toggles.
# - if ENABLED_CMD is None, we populate it with everything included here and in
# all the extensions and use the TOGGLE_ list to produce the final list.
# The rationale behind this is to support both a configuration option where the
# cmds enabled are explicitly listed (typical in a lot of software) but also to
# support a "configuration-less" version where the installation of the extensions
# determines what is activated.
if ENABLED_CMD is None:
ENABLED_CMD = list(_all_cmds_dict) + _TOGGLE_CMD
def resolve_cmds():
_ext_debug(" Resolving metaflow commands")
list_of_cmds = ENABLED_CMD
_ext_debug(" Raw list is: %s" % str(list_of_cmds))
set_of_commands = set()
for p in list_of_cmds:
if p.startswith("-"):
set_of_commands.discard(p[1:])
elif p.startswith("+"):
set_of_commands.add(p[1:])
else:
set_of_commands.add(p)
_ext_debug(" Resolved list is: %s" % str(set_of_commands))
to_return = []
for name in set_of_commands:
class_path = _all_cmds_dict.get(name, None)
if class_path is None:
raise ValueError(
"Configuration requested command '%s' but no such command is available"
% name
)
path, cls_name = class_path.rsplit(".", 1)
try:
cmd_module = importlib.import_module(path)
except ImportError:
raise ValueError("Cannot locate command '%s' at '%s'" % (name, path))
cls = getattr(cmd_module, cls_name, None)
if cls is None:
raise ValueError(
"Cannot locate '%s' class for command at '%s'" % (cls_name, path)
)
all_cmds = list(cls.commands)
if len(all_cmds) > 1:
raise ValueError("%s defines more than one command -- use a group" % path)
if all_cmds[0] != name:
raise ValueError(
"%s: expected name to be '%s' but got '%s' instead"
% (path, name, all_cmds[0])
)
to_return.append(cls)
_ext_debug(" Added command '%s' from '%s'" % (name, class_path))
return to_return
def _get_ext_cmds(module_globals):
return module_globals.get("CMDS_DESC", [])
def _set_ext_cmds(module_globals, value):
module_globals["CMDS_DESC"] = value
def _resolve_relative_paths(module_globals):
# We want to modify all the relevant lists so that the relative paths
# are made fully qualified paths for the modules
pkg_path = module_globals["__package__"]
pkg_components = pkg_path.split(".")
def resolve_path(class_path):
# Converts a relative class_path to an absolute one considering that the
# relative class_path is present in a package pkg_path
if class_path[0] == ".":
i = 1
# Check for multiple "." at the start of the class_path
while class_path[i] == ".":
i += 1
if i > len(pkg_components):
raise ValueError(
"Path '%s' exits out of Metaflow module at %s"
% (class_path, pkg_path)
)
return (
".".join(pkg_components[: -i + 1] if i > 1 else pkg_components)
+ class_path[i - 1 :]
)
return class_path
_set_ext_cmds(
module_globals,
list(map(lambda p: (p[0], resolve_path(p[1])), _get_ext_cmds(module_globals))),
)
================================================
FILE: metaflow/extension_support/integrations.py
================================================
import importlib
import traceback
from metaflow.metaflow_config_funcs import from_conf
from . import _ext_debug, get_modules
# This file is similar in functionality to the cmd.py file. Please refer to that
# one for more information on what the functions do.
def process_integration_aliases(module_globals):
_resolve_relative_paths(module_globals)
all_aliases = _get_ext_aliases(module_globals)
all_aliases_dict = {}
toggle_alias = []
list_of_aliases = from_conf("ENABLED_INTEGRATION_ALIAS")
try:
modules_to_import = get_modules("alias")
# This is like multiload_all but we load globals independently since we just care
# about the TOGGLE and ENABLED values
for m in modules_to_import:
for n, o in m.module.__dict__.items():
if n == "TOGGLE_INTEGRATION_ALIAS":
toggle_alias.extend(o)
elif n == "ENABLED_INTEGRATION_ALIAS":
list_of_aliases = o
_resolve_relative_paths(m.module.__dict__)
all_aliases.extend(_get_ext_aliases(m.module.__dict__))
except Exception as e:
_ext_debug(
"\tWARNING: ignoring all integration aliases due to error during import: %s"
% e
)
print(
"WARNING: Integration aliases did not load -- ignoring all of them which "
"may not be what you want: %s" % e
)
traceback.print_exc()
# At this point, we have _all_aliases populated with all the tuples
# (name, module_class) from all the aliases in all the extensions (if any)
# We build a dictionary taking the latest presence for each name (so plugins
# override metaflow core)
for name, obj_path in all_aliases:
_ext_debug(" Adding integration alias '%s' from '%s'" % (name, obj_path))
all_aliases_dict[name] = obj_path
# Resolve the ENABLED_INTEGRATION_ALIAS variable. The rules are the following:
# - if ENABLED_INTEGRATION_ALIAS is non None, it means it was either set directly
# by the user in a configuration file, on the command line or by an extension.
# In that case we honor those wishes and completely ignore the extensions' toggles.
# - if ENABLED_INTEGRATION_ALIAS is None, we populate it with everything included
# here and in all the extensions and use the TOGGLE_ list to produce the final list.
# The rationale behind this is to support both a configuration option where the
# aliases enabled are explicitly listed (typical in a lot of software) but also to
# support a "configuration-less" version where the installation of the extensions
# determines what is activated.
if list_of_aliases is None:
list_of_aliases = list(all_aliases_dict) + toggle_alias
_ext_debug(" Resolving metaflow integration aliases")
_ext_debug(" Raw list is: %s" % str(list_of_aliases))
set_of_aliases = set()
for p in list_of_aliases:
if p.startswith("-"):
set_of_aliases.discard(p[1:])
elif p.startswith("+"):
set_of_aliases.add(p[1:])
else:
set_of_aliases.add(p)
_ext_debug(" Resolved list is: %s" % str(set_of_aliases))
for name in set_of_aliases:
obj_path = all_aliases_dict.get(name, None)
if obj_path is None:
raise ValueError(
"Configuration requested integration alias '%s' but no such alias "
"is available" % name
)
path, obj_name = obj_path.rsplit(".", 1)
try:
alias_module = importlib.import_module(path)
except ImportError:
raise ValueError(
"Cannot locate integration alias '%s' at '%s'" % (name, path)
)
obj = getattr(alias_module, obj_name, None)
if obj is None:
raise ValueError(
"Cannot locate '%s' object for integration alias at '%s'"
% (obj_name, path)
)
_ext_debug(" Added integration alias '%s' from '%s'" % (name, obj_path))
module_globals[name] = obj
def _get_ext_aliases(module_globals):
return module_globals.get("ALIASES_DESC", [])
def _set_ext_aliases(module_globals, value):
module_globals["ALIASES_DESC"] = value
def _resolve_relative_paths(module_globals):
# We want to modify all the relevant lists so that the relative paths
# are made fully qualified paths for the modules
pkg_path = module_globals["__package__"]
pkg_components = pkg_path.split(".")
def resolve_path(class_path):
# Converts a relative class_path to an absolute one considering that the
# relative class_path is present in a package pkg_path
if class_path[0] == ".":
i = 1
# Check for multiple "." at the start of the class_path
while class_path[i] == ".":
i += 1
if i > len(pkg_components):
raise ValueError(
"Path '%s' exits out of Metaflow module at %s"
% (class_path, pkg_path)
)
return (
".".join(pkg_components[: -i + 1] if i > 1 else pkg_components)
+ class_path[i - 1 :]
)
return class_path
_set_ext_aliases(
module_globals,
list(
map(lambda p: (p[0], resolve_path(p[1])), _get_ext_aliases(module_globals))
),
)
================================================
FILE: metaflow/extension_support/plugins.py
================================================
import importlib
import traceback
from metaflow.metaflow_config_funcs import from_conf
from . import _ext_debug, alias_submodules, get_modules, lazy_load_aliases
def process_plugins(module_globals):
_resolve_relative_paths(module_globals)
# Set ENABLED_ and _TOGGLE_ variables. The ENABLED_* variables are read from
# configuration and the _TOGGLE_* variables are initialized to empty lists to be
# appended to from the extensions.
for plugin_category in _plugin_categories:
upper_category = plugin_category.upper()
globals()["ENABLED_%s" % upper_category] = from_conf(
"ENABLED_%s" % upper_category
)
globals()["_TOGGLE_%s" % upper_category] = []
# Initialize the list of available plugins to what is available in Metaflow core
globals()[_list_for_category(plugin_category)] = _get_ext_plugins(
module_globals, plugin_category
)
try:
modules_to_import = get_modules("plugins")
# This is like multiload_all but we load globals independently since we just care
# about the TOGGLE and ENABLED values
for m in modules_to_import:
lazy_load_aliases(
alias_submodules(m.module, m.tl_package, "plugins", extra_indent=True)
)
for n, o in m.module.__dict__.items():
if n.startswith("TOGGLE_") and n[7:].lower() in _plugin_categories:
# Extensions append to the TOGGLE list
globals()["_TOGGLE_%s" % n[7:]].extend(o)
elif n.startswith("ENABLED_") and n[8:].lower() in _plugin_categories:
# Extensions override the ENABLED_ setting.
globals()[n] = o
_resolve_relative_paths(m.module.__dict__)
for plugin_category in _plugin_categories:
# Collect all the plugins present
globals()[_list_for_category(plugin_category)].extend(
_get_ext_plugins(m.module.__dict__, plugin_category)
)
except Exception as e:
_ext_debug("\tWARNING: ignoring all plugins due to error during import: %s" % e)
print(
"WARNING: Plugins did not load -- ignoring all of them which may not "
"be what you want: %s" % e
)
traceback.print_exc()
# At this point, we have _all_s populated with all the tuples
# (name, module_class) from all the plugins in all the extensions (if any)
# We build a dictionary taking the latest presence for each name (so plugins
# override metaflow core)
for plugin_category in _plugin_categories:
upper_category = plugin_category.upper()
d = globals()[_dict_for_category(plugin_category)] = {}
for name, class_path in globals()["_all_%ss" % plugin_category]:
_ext_debug(
" Adding %s '%s' from '%s'" % (plugin_category, name, class_path)
)
d[name] = class_path
# Resolve all the ENABLED_* variables. The rules are the following:
# - if ENABLED_* is non None, it means it was either set directly by the user
# in a configuration file, on the command line or by an extension. In that case
# we honor those wishes and completely ignore the extensions' toggles.
# - if ENABLED_* is None, we populate it with everything included here and in
# all the extensions and use the TOGGLE_ list to produce the final list.
# The rationale behind this is to support both a configuration option where the
# plugins enabled are explicitly listed (typical in a lot of software) but also to
# support a "configuration-less" version where the installation of the extensions
# determines what is activated.
if globals()["ENABLED_%s" % upper_category] is None:
globals()["ENABLED_%s" % upper_category] = (
list(d) + globals()["_TOGGLE_%s" % upper_category]
)
def merge_lists(base, overrides, attr):
# Merge two lists of classes by comparing them for equality using 'attr'.
# This function prefers anything in 'overrides'. In other words, if a class
# is present in overrides and matches (according to the equality criterion) a class in
# base, it will be used instead of the one in base.
l = list(overrides)
existing = set([getattr(o, attr) for o in overrides])
l.extend([d for d in base if getattr(d, attr) not in existing])
base[:] = l[:]
def get_plugin(category, class_path, name):
path, cls_name = class_path.rsplit(".", 1)
try:
plugin_module = importlib.import_module(path)
except ImportError as e:
raise ValueError(
"Cannot locate %s plugin '%s' at '%s'" % (category, name, path)
) from e
cls = getattr(plugin_module, cls_name, None)
if cls is None:
raise ValueError(
"Cannot locate '%s' class for %s plugin at '%s'"
% (cls_name, category, path)
)
extracted_name = get_plugin_name(category, cls)
if extracted_name and extracted_name != name:
raise ValueError(
"Class '%s' at '%s' for %s plugin expected to be named '%s' but got '%s'"
% (cls_name, path, category, name, extracted_name)
)
globals()[cls_name] = cls
_ext_debug(" Added %s plugin '%s' from '%s'" % (category, name, class_path))
return cls
def resolve_plugins(category, path_only=False):
# Called to return a list of classes that are the available plugins for 'category'
# The ENABLED_ variable is set in process_plugins
# based on all the plugins that are found; it can contain either names of
# plugins or -/+ indicating a "toggle" to activate/de-activate
# a plugin.
list_of_plugins = globals()["ENABLED_%s" % category.upper()]
_ext_debug(" Resolving %s plugins" % category)
_ext_debug(" Raw list of plugins is: %s" % str(list_of_plugins))
set_of_plugins = set()
for p in list_of_plugins:
if p.startswith("-"):
set_of_plugins.discard(p[1:])
elif p.startswith("+"):
set_of_plugins.add(p[1:])
else:
set_of_plugins.add(p)
available_plugins = globals()[_dict_for_category(category)]
name_extractor = _plugin_categories[category]
if path_only or not name_extractor:
# If we have no name function, it means we just use the name in the dictionary
# and we return a dictionary. This is for sidecars mostly as they do not have
# a field that indicates their name
to_return = {}
else:
to_return = []
_ext_debug(" Resolved list of plugins is: %s" % str(set_of_plugins))
# Various error checks to make sure the plugin exists -- basically converts a string
# representing a class path to the actual class. We try to give useful messages
# in case of errors.
for name in set_of_plugins:
class_path = available_plugins.get(name, None)
if class_path is None:
raise ValueError(
"Configuration requested %s plugin '%s' but no such plugin is available"
% (category, name)
)
if path_only:
to_return[name] = class_path
else:
if name_extractor is not None:
to_return.append(get_plugin(category, class_path, name))
else:
to_return[name] = get_plugin(category, class_path, name)
return to_return
# Some plugins do not have a field in them indicating their name.
# This is the case for sidecars.
# All other plugins contain a field that indicates their name.
# _plugin_categories contains all the types of plugins and, for ones that have
# a field indicating their name,
# an additional function indicating how to extract the name of the plugin is provided.
# key is the type of plugin
# value is either:
# - a function to extract the name of the plugin from the plugin itself
# - None if this is a plugin with no field for its name
_plugin_categories = {
"step_decorator": lambda x: x.name,
"flow_decorator": lambda x: x.name,
"environment": lambda x: x.TYPE,
"metadata_provider": lambda x: x.TYPE,
"datastore": lambda x: x.TYPE,
"dataclient": lambda x: x.TYPE,
"secrets_provider": lambda x: x.TYPE,
"gcp_client_provider": lambda x: x.name,
"deployer_impl_provider": lambda x: x.TYPE,
"azure_client_provider": lambda x: x.name,
"sidecar": None,
"logging_sidecar": None,
"monitor_sidecar": None,
"aws_client_provider": lambda x: x.name,
"cli": lambda x: (
list(x.commands)[0] if len(x.commands) == 1 else "too many commands"
),
"runner_cli": lambda x: x.name,
"tl_plugin": None,
}
def get_plugin_name(category, plugin):
extractor = _plugin_categories[category]
if extractor:
return extractor(plugin)
return None
def _list_for_category(category):
# Convenience function to name the variable containing List[Tuple[str, str]] where
# each tuple contains:
# - the name of the plugin
# - the classpath of the plugin
return "_all_%ss" % category
def _dict_for_category(category):
# Convenience function to name the variable containing the same thing as
# _list_for_category except that it is now in dict form where the key is the name
# of the plugin
return "_all_%ss_dict" % category
def _get_ext_plugins(module_globals, category):
# Convenience function to get the list of Tuple[str, str] describing the plugins
# available from the extension. This defaults to [] so not all plugins need to be
# listed.
return module_globals.get("%sS_DESC" % category.upper(), [])
def _set_ext_plugins(module_globals, category, val):
module_globals["%sS_DESC" % category.upper()] = val
def _resolve_relative_paths(module_globals):
# We want to modify all the relevant lists so that the relative paths
# are made fully qualified paths for the modules
pkg_path = module_globals["__package__"]
pkg_components = pkg_path.split(".")
def resolve_path(class_path):
# Converts a relative class_path to an absolute one considering that the
# relative class_path is present in a package pkg_path
if class_path[0] == ".":
i = 1
# Check for multiple "." at the start of the class_path
while class_path[i] == ".":
i += 1
if i > len(pkg_components):
raise ValueError(
"Path '%s' exits out of Metaflow module at %s"
% (class_path, pkg_path)
)
return (
".".join(pkg_components[: -i + 1] if i > 1 else pkg_components)
+ class_path[i - 1 :]
)
return class_path
for plugin_category in _plugin_categories:
_set_ext_plugins(
module_globals,
plugin_category,
list(
map(
lambda p: (p[0], resolve_path(p[1])),
_get_ext_plugins(module_globals, plugin_category),
)
),
)
================================================
FILE: metaflow/flowspec.py
================================================
import inspect
import os
import sys
import traceback
import reprlib
from collections.abc import MutableMapping
from enum import Enum
from itertools import islice
from types import FunctionType, MethodType
from typing import Any, Callable, List, Optional, Tuple
from . import cmd_with_io, parameters
from .debug import debug
from .parameters import DelayedEvaluationParameter, Parameter
from .exception import (
MetaflowException,
MissingInMergeArtifactsException,
MetaflowInternalError,
UnhandledInMergeArtifactsException,
)
from .extension_support import extension_info
from .graph import FlowGraph
from .unbounded_foreach import UnboundedForeachInput
from .user_configs.config_parameters import ConfigValue
from .user_decorators.mutable_flow import MutableFlow
from .user_decorators.mutable_step import MutableStep
from .user_decorators.user_flow_decorator import FlowMutator
from .user_decorators.user_step_decorator import StepMutator
from .util import to_pod
from .metaflow_config import INCLUDE_FOREACH_STACK, MAXIMUM_FOREACH_VALUE_CHARS
# For Python 3 compatibility
try:
basestring
except NameError:
basestring = str
from .datastore.inputs import Inputs
INTERNAL_ARTIFACTS_SET = set(
[
"_foreach_values",
"_unbounded_foreach",
"_control_mapper_tasks",
"_control_task_is_mapper_zero",
"_parallel_ubf_iter",
]
)
class InvalidNextException(MetaflowException):
headline = "Invalid self.next() transition detected"
def __init__(self, msg):
# NOTE this assume that InvalidNextException is only raised
# at the top level of next()
_, line_no, _, _ = traceback.extract_stack()[-3]
super(InvalidNextException, self).__init__(msg, line_no)
class ParallelUBF(UnboundedForeachInput):
"""
Unbounded-for-each placeholder for supporting parallel (multi-node) steps.
"""
def __init__(self, num_parallel):
self.num_parallel = num_parallel
def __getitem__(self, item):
return item or 0 # item is None for the control task, but it is also split 0
# First two items are inherited from parent classes; last three are not
class FlowStateItems(Enum):
FLOW_MUTATORS = 1
FLOW_DECORATORS = 2
CONFIGS = 3
CACHED_PARAMETERS = 4
SET_CONFIG_PARAMETERS = 5 # Parameters that now have a ConfigValue (converted)
class _FlowState(MutableMapping):
# Dict like structure to hold state information about the flow but it holds
# the key/values in two sub dictionaries: the ones that are specific to the flow
# and the ones that are inherited from parent classes.
# This is NOT a general purpose class and is meant to only work with FlowSpec.
# For example, it assumes that items are only list, dicts or None and assumes that
# self._self_data has all keys properly initialized.
_non_inherited_items = [
FlowStateItems.CONFIGS,
FlowStateItems.CACHED_PARAMETERS,
FlowStateItems.SET_CONFIG_PARAMETERS,
]
def __init__(self, *args, **kwargs):
self._self_data = dict(*args, **kwargs)
self._merged_data = {}
self._inherited = {}
def __getitem__(self, key):
if key in self._non_inherited_items:
return self._self_data[key]
if key in self._merged_data:
return self._merged_data[key]
# We haven't accessed this yet so compute it for the first time
self_value = self._self_data.get(key)
inherited_value = self._inherited.get(key)
if self_value is not None:
# ORDER IS IMPORTANT: we use inherited first and extend by whatever is in
# the flowspec
self._merged_data[key] = self._merge_value(inherited_value, self_value)
return self._merged_data[key]
raise KeyError(key)
def __setitem__(self, key, value):
self._self_data[key] = value
def __delitem__(self, key):
if key in self._non_inherited_items:
del self._self_data[key]
del self._merged_data[key]
def __iter__(self):
# All keys are in self._self_data
for key in self._self_data:
yield self[key]
def __len__(self):
return len(self._self_data)
@property
def self_data(self):
self._merged_data.clear()
return self._self_data
@property
def inherited_data(self):
return self._inherited
def _merge_value(self, inherited_value, self_value):
if self_value is None:
return None
inherited_value = inherited_value or type(self_value)()
if isinstance(self_value, dict):
return {**inherited_value, **self_value}
elif isinstance(self_value, list):
return inherited_value + self_value
raise RuntimeError(
f"Cannot merge values of type {type(inherited_value)} and {type(self_value)} -- "
"please report this as a bug"
)
class FlowSpecMeta(type):
def __init__(cls, name, bases, attrs):
super().__init__(name, bases, attrs)
if name == "FlowSpec":
return
cls._init_attrs()
def _init_attrs(cls):
from .decorators import (
DuplicateFlowDecoratorException,
) # Prevent circular import
# We store some state in the flow class itself. This is primarily used to
# attach global state to a flow. It is *not* an actual global because of
# Runner/NBRunner. This is also created here in the meta class to avoid it being
# shared between different children classes.
# Keys are FlowStateItems enum values
cls._flow_state = _FlowState(
{
FlowStateItems.FLOW_MUTATORS: [],
FlowStateItems.FLOW_DECORATORS: {},
FlowStateItems.CONFIGS: {},
FlowStateItems.CACHED_PARAMETERS: None,
FlowStateItems.SET_CONFIG_PARAMETERS: [],
}
)
# Keep track if configs have been processed -- this is particularly applicable
# for the Runner/Deployer where calling multiple APIs on the same flow could
# cause the configs to be processed multiple times. For a given flow, once
# the configs have been processed, we do not process them again.
cls._configs_processed = False
# We inherit stuff from our parent classes as well -- we need to be careful
# in terms of the order; we will follow the MRO with the following rules:
# - decorators will cause an error if they do not
# support multiple and we see multiple instances of the same
# - config decorators will be joined
# - configs will be added later directly by the class; base class configs will
# be taken into account as they would be inherited.
# We only need to do this for the base classes since the current class will
# get updated as decorators are parsed.
# We also need to be sure to not duplicate things. Consider something like
# class A(FlowSpec):
# pass
#
# class B(A):
# pass
#
# class C(B):
# pass
#
# C inherits from both B and A but we need to duplicate things from A only
# ONCE. To do this, we only propagate the self data from each class.
for base in cls.__mro__:
if base != cls and base != FlowSpec and issubclass(base, FlowSpec):
# Take care of decorators
base_flow_decorators = base._flow_state.self_data[
FlowStateItems.FLOW_DECORATORS
]
inherited_cls_flow_decorators = (
cls._flow_state.inherited_data.setdefault(
FlowStateItems.FLOW_DECORATORS, {}
)
)
for deco_name, deco in base_flow_decorators.items():
if not deco:
continue
deco_allow_multiple = deco[0].allow_multiple
if (
deco_name in inherited_cls_flow_decorators
and not deco_allow_multiple
):
raise DuplicateFlowDecoratorException(deco_name)
inherited_cls_flow_decorators.setdefault(deco_name, []).extend(deco)
# Take care of flow mutators -- configs are just objects in the class
# so they are naturally inherited. We do not need to do anything special
# for them.
base_mutators = base._flow_state.self_data[FlowStateItems.FLOW_MUTATORS]
if base_mutators:
cls._flow_state.inherited_data.setdefault(
FlowStateItems.FLOW_MUTATORS, []
).extend(base_mutators)
cls._init_graph()
def _init_graph(cls):
# Graph and steps are specific to the class -- store here so we can access
# in class method _process_config_decorators
cls._graph = FlowGraph(cls)
cls._steps = [getattr(cls, node.name) for node in cls._graph]
class FlowSpec(metaclass=FlowSpecMeta):
"""
Main class from which all Flows should inherit.
Attributes
----------
index
input
"""
# Attributes that are not saved in the datastore when checkpointing.
# Name starting with '__', methods, functions and Parameters do not need
# to be listed.
_EPHEMERAL = {
"_EPHEMERAL",
"_NON_PARAMETERS",
"_datastore",
"_cached_input",
"_graph",
"_flow_state",
"_steps",
"index",
"input",
}
# When checking for parameters, we look at dir(self) but we want to exclude
# attributes that are definitely not parameters and may be expensive to
# compute (like anything related to the `foreach_stack`). We don't need to exclude
# names starting with `_` as those are already excluded from `_get_parameters`.
_NON_PARAMETERS = {"cmd", "foreach_stack", "index", "input", "script_name", "name"}
def __init__(self, use_cli=True):
"""
Construct a FlowSpec
Parameters
----------
use_cli : bool, default True
Set to True if the flow is invoked from __main__ or the command line
"""
self.name = self.__class__.__name__
self._datastore = None
self._transition = None
self._cached_input = {}
if use_cli:
with parameters.flow_context(self.__class__) as _:
from . import cli
cli.main(self)
@property
def script_name(self) -> str:
"""
[Legacy function - do not use. Use `current` instead]
Returns the name of the script containing the flow
Returns
-------
str
A string containing the name of the script
"""
fname = inspect.getfile(self.__class__)
if fname.endswith(".pyc"):
fname = fname[:-1]
return os.path.basename(fname)
@property
def _flow_decorators(self):
# Backward compatible method to access flow decorators
return self._flow_state[FlowStateItems.FLOW_DECORATORS]
@property
def _flow_mutators(self):
return self._flow_state[FlowStateItems.FLOW_MUTATORS]
@classmethod
def _check_parameters(cls, config_parameters=False):
seen = set()
for _, param in cls._get_parameters():
if param.IS_CONFIG_PARAMETER != config_parameters:
continue
norm = param.name.lower()
if norm in seen:
raise MetaflowException(
"Parameter *%s* is specified twice. "
"Note that parameter names are "
"case-insensitive." % param.name
)
seen.add(norm)
@classmethod
def _process_config_decorators(cls, config_options, process_configs=True):
if cls._configs_processed:
debug.userconf_exec("Mutating step/flow decorators already processed")
return None
cls._configs_processed = True
# Fast path for no user configurations
if not process_configs or (
not cls._flow_state[FlowStateItems.FLOW_MUTATORS]
and all(len(step.config_decorators) == 0 for step in cls._steps)
):
# Process parameters to allow them to also use config values easily
for var, param in cls._get_parameters():
if isinstance(param, ConfigValue) or param.IS_CONFIG_PARAMETER:
continue
param.init(not process_configs)
return None
debug.userconf_exec("Processing mutating step/flow decorators")
# We need to convert all the user configurations from DelayedEvaluationParameters
# to actual values so they can be used as is in the mutators.
# We, however, need to make sure _get_parameters still works properly so
# we store what was a config and has been set to a specific value.
# This is safe to do for now because all other uses of _get_parameters typically
# do not rely on the variable itself but just the parameter.
to_save_configs = []
cls._check_parameters(config_parameters=True)
for var, param in cls._get_parameters():
if not param.IS_CONFIG_PARAMETER:
continue
# Note that a config with no default and not required will be None
val = config_options.get(param.name.replace("-", "_").lower())
if isinstance(val, DelayedEvaluationParameter):
val = val()
# We store the value as well so that in _set_constants, we don't try
# to recompute (no guarantee that it is stable)
param._store_value(val)
to_save_configs.append((var, param))
debug.userconf_exec("Setting config %s to %s" % (var, str(val)))
setattr(cls, var, val)
cls._flow_state[FlowStateItems.SET_CONFIG_PARAMETERS] = to_save_configs
# Run all the decorators. We first run the flow-level decorators
# and then the step level ones to maintain a consistent order with how
# other decorators are run.
for deco in cls._flow_state[FlowStateItems.FLOW_MUTATORS]:
if isinstance(deco, FlowMutator):
inserted_by_value = [deco.decorator_name] + (deco.inserted_by or [])
mutable_flow = MutableFlow(
cls,
pre_mutate=True,
statically_defined=deco.statically_defined,
inserted_by=inserted_by_value,
)
# Sanity check to make sure we are applying the decorator to the right
# class
if not deco._flow_cls == cls and not issubclass(cls, deco._flow_cls):
raise MetaflowInternalError(
"FlowMutator registered on the wrong flow -- "
"expected %s but got %s"
% (deco._flow_cls.__name__, cls.__name__)
)
debug.userconf_exec(
"Evaluating flow level decorator %s (pre-mutate)"
% deco.__class__.__name__
)
deco.pre_mutate(mutable_flow)
else:
raise MetaflowInternalError(
"A non FlowMutator found in flow custom decorators"
)
for step in cls._steps:
for deco in step.config_decorators:
if isinstance(deco, StepMutator):
inserted_by_value = [deco.decorator_name] + (deco.inserted_by or [])
debug.userconf_exec(
"Evaluating step level decorator %s for %s (pre-mutate)"
% (deco.__class__.__name__, step.name)
)
deco.pre_mutate(
MutableStep(
cls,
step,
pre_mutate=True,
statically_defined=deco.statically_defined,
inserted_by=inserted_by_value,
)
)
else:
raise MetaflowInternalError(
"A non StepMutator found in step custom decorators"
)
# Process parameters to allow them to also use config values easily
for var, param in cls._get_parameters():
if param.IS_CONFIG_PARAMETER:
continue
param.init()
# Set the current flow class we are in (the one we just created)
parameters.replace_flow_context(cls)
# Re-calculate class level attributes after modifying the class
cls._init_graph()
return cls
def _set_constants(self, graph, kwargs, config_options):
from metaflow.decorators import (
flow_decorators,
) # To prevent circular dependency
# Persist values for parameters and other constants (class level variables)
# only once. This method is called before persist_constants is called to
# persist all values set using setattr
self._check_parameters(config_parameters=False)
seen = set()
self._success = True
parameters_info = []
for var, param in self._get_parameters():
seen.add(var)
if param.IS_CONFIG_PARAMETER:
# Use computed value if already evaluated, else get from config_options
val = param._computed_value or config_options.get(param.name)
else:
val = kwargs[param.name.replace("-", "_").lower()]
# Support for delayed evaluation of parameters.
if isinstance(val, DelayedEvaluationParameter):
val = val()
val = val.split(param.separator) if val and param.separator else val
if isinstance(val, ConfigValue):
# We store config values as dict so they are accessible with older
# metaflow clients. It also makes it easier to access.
val = val.to_dict()
setattr(self, var, val)
parameters_info.append({"name": var, "type": param.__class__.__name__})
# Do the same for class variables which will be forced constant as modifications
# to them don't propagate well since we create a new process for each step and
# re-read the flow file
constants_info = []
for var in dir(self.__class__):
if var[0] == "_" or var in self._NON_PARAMETERS or var in seen:
continue
val = getattr(self.__class__, var)
if isinstance(val, (MethodType, FunctionType, property, type)):
continue
constants_info.append({"name": var, "type": type(val).__name__})
setattr(self, var, val)
# We store the DAG information as an artifact called _graph_info
steps_info, graph_structure = graph.output_steps()
graph_info = {
"file": os.path.basename(os.path.abspath(sys.argv[0])),
"parameters": parameters_info,
"constants": constants_info,
"steps": steps_info,
"graph_structure": graph_structure,
"doc": graph.doc,
"decorators": [
{
"name": deco.name,
"attributes": to_pod(deco.attributes),
"statically_defined": deco.statically_defined,
"inserted_by": deco.inserted_by,
}
for deco in flow_decorators(self)
if not deco.name.startswith("_")
]
+ [
{
"name": deco.__class__.__name__,
"attributes": {},
"statically_defined": deco.statically_defined,
"inserted_by": deco.inserted_by,
}
for deco in self._flow_state[FlowStateItems.FLOW_MUTATORS]
],
"extensions": extension_info(),
}
self._graph_info = graph_info
@classmethod
def _get_parameters(cls):
cached = cls._flow_state[FlowStateItems.CACHED_PARAMETERS]
returned = set()
if cached is not None:
for set_config in cls._flow_state[FlowStateItems.SET_CONFIG_PARAMETERS]:
returned.add(set_config[0])
yield set_config[0], set_config[1]
for var in cached:
if var not in returned:
yield var, getattr(cls, var)
return
build_list = []
for set_config in cls._flow_state[FlowStateItems.SET_CONFIG_PARAMETERS]:
returned.add(set_config[0])
yield set_config[0], set_config[1]
for var in dir(cls):
if var[0] == "_" or var in cls._NON_PARAMETERS:
continue
try:
val = getattr(cls, var)
except:
continue
if isinstance(val, Parameter) and var not in returned:
build_list.append(var)
yield var, val
cls._flow_state[FlowStateItems.CACHED_PARAMETERS] = build_list
def _set_datastore(self, datastore):
self._datastore = datastore
def __iter__(self):
"""
[Legacy function - do not use]
Iterate over all steps in the Flow
Returns
-------
Iterator[graph.DAGNode]
Iterator over the steps in the flow
"""
return iter(self._steps)
def __getattr__(self, name: str):
if self._datastore and name in self._datastore:
# load the attribute from the datastore...
x = self._datastore[name]
# ...and cache it in the object for faster access
setattr(self, name, x)
return x
else:
raise AttributeError("Flow %s has no attribute '%s'" % (self.name, name))
def cmd(self, cmdline, input={}, output=[]):
"""
[Legacy function - do not use]
"""
return cmd_with_io.cmd(cmdline, input=input, output=output)
@property
def index(self) -> Optional[int]:
"""
The index of this foreach branch.
In a foreach step, multiple instances of this step (tasks) will be executed,
one for each element in the foreach. This property returns the zero based index
of the current task. If this is not a foreach step, this returns None.
If you need to know the indices of the parent tasks in a nested foreach, use
`FlowSpec.foreach_stack`.
Returns
-------
int, optional
Index of the task in a foreach step.
"""
if self._foreach_stack:
return self._foreach_stack[-1].index
@property
def input(self) -> Optional[Any]:
"""
The value of the foreach artifact in this foreach branch.
In a foreach step, multiple instances of this step (tasks) will be executed,
one for each element in the foreach. This property returns the element passed
to the current task. If this is not a foreach step, this returns None.
If you need to know the values of the parent tasks in a nested foreach, use
`FlowSpec.foreach_stack`.
Returns
-------
object, optional
Input passed to the foreach task.
"""
return self._find_input()
def foreach_stack(self) -> Optional[List[Tuple[int, int, Any]]]:
"""
Returns the current stack of foreach indexes and values for the current step.
Use this information to understand what data is being processed in the current
foreach branch. For example, considering the following code:
```
@step
def root(self):
self.split_1 = ['a', 'b', 'c']
self.next(self.nest_1, foreach='split_1')
@step
def nest_1(self):
self.split_2 = ['d', 'e', 'f', 'g']
self.next(self.nest_2, foreach='split_2'):
@step
def nest_2(self):
foo = self.foreach_stack()
```
`foo` will take the following values in the various tasks for nest_2:
```
[(0, 3, 'a'), (0, 4, 'd')]
[(0, 3, 'a'), (1, 4, 'e')]
...
[(0, 3, 'a'), (3, 4, 'g')]
[(1, 3, 'b'), (0, 4, 'd')]
...
```
where each tuple corresponds to:
- The index of the task for that level of the loop.
- The number of splits for that level of the loop.
- The value for that level of the loop.
Note that the last tuple returned in a task corresponds to:
- 1st element: value returned by `self.index`.
- 3rd element: value returned by `self.input`.
Returns
-------
List[Tuple[int, int, Any]]
An array describing the current stack of foreach steps.
"""
return [
(frame.index, frame.num_splits, self._find_input(stack_index=i))
for i, frame in enumerate(self._foreach_stack)
]
def _find_input(self, stack_index=None):
if stack_index is None:
stack_index = len(self._foreach_stack) - 1
if stack_index in self._cached_input:
return self._cached_input[stack_index]
elif self._foreach_stack:
# NOTE this is obviously an O(n) operation which also requires
# downloading the whole input data object in order to find the
# right split. One can override this method with a more efficient
# input data handler if this is a problem.
frame = self._foreach_stack[stack_index]
try:
var = getattr(self, frame.var)
except AttributeError:
# this is where AttributeError happens:
# [ foreach x ]
# [ foreach y ]
# [ inner ]
# [ join y ] <- call self.foreach_stack here,
# self.x is not available
self._cached_input[stack_index] = None
else:
try:
self._cached_input[stack_index] = var[frame.index]
except TypeError:
# __getitem__ not supported, fall back to an iterator
self._cached_input[stack_index] = next(
islice(var, frame.index, frame.index + 1)
)
return self._cached_input[stack_index]
def merge_artifacts(
self,
inputs: Inputs,
exclude: Optional[List[str]] = None,
include: Optional[List[str]] = None,
) -> None:
"""
Helper function for merging artifacts in a join step.
This function takes all the artifacts coming from the branches of a
join point and assigns them to self in the calling step. Only artifacts
not set in the current step are considered. If, for a given artifact, different
values are present on the incoming edges, an error will be thrown and the artifacts
that conflict will be reported.
As a few examples, in the simple graph: A splitting into B and C and joining in D:
```
A:
self.x = 5
self.y = 6
B:
self.b_var = 1
self.x = from_b
C:
self.x = from_c
D:
merge_artifacts(inputs)
```
In D, the following artifacts are set:
- `y` (value: 6), `b_var` (value: 1)
- if `from_b` and `from_c` are the same, `x` will be accessible and have value `from_b`
- if `from_b` and `from_c` are different, an error will be thrown. To prevent this error,
you need to manually set `self.x` in D to a merged value (for example the max) prior to
calling `merge_artifacts`.
Parameters
----------
inputs : Inputs
Incoming steps to the join point.
exclude : List[str], optional, default None
If specified, do not consider merging artifacts with a name in `exclude`.
Cannot specify if `include` is also specified.
include : List[str], optional, default None
If specified, only merge artifacts specified. Cannot specify if `exclude` is
also specified.
Raises
------
MetaflowException
This exception is thrown if this is not called in a join step.
UnhandledInMergeArtifactsException
This exception is thrown in case of unresolved conflicts.
MissingInMergeArtifactsException
This exception is thrown in case an artifact specified in `include` cannot
be found.
"""
include = include or []
exclude = exclude or []
node = self._graph[self._current_step]
if node.type != "join":
msg = (
"merge_artifacts can only be called in a join and step *{step}* "
"is not a join".format(step=self._current_step)
)
raise MetaflowException(msg)
if len(exclude) > 0 and len(include) > 0:
msg = "`exclude` and `include` are mutually exclusive in merge_artifacts"
raise MetaflowException(msg)
to_merge = {}
unresolved = []
for inp in inputs:
# available_vars is the list of variables from inp that should be considered
if include:
available_vars = (
(var, sha)
for var, sha in inp._datastore.items()
if (var in include) and (not hasattr(self, var))
)
else:
available_vars = (
(var, sha)
for var, sha in inp._datastore.items()
if (var not in exclude)
and (not hasattr(self, var))
and (var not in INTERNAL_ARTIFACTS_SET)
)
for var, sha in available_vars:
_, previous_sha = to_merge.setdefault(var, (inp, sha))
if previous_sha != sha:
# We have a conflict here
unresolved.append(var)
# Check if everything in include is present in to_merge
missing = []
for v in include:
if v not in to_merge and not hasattr(self, v):
missing.append(v)
if unresolved:
# We have unresolved conflicts, so we do not set anything and error out
msg = (
"Step *{step}* cannot merge the following artifacts due to them "
"having conflicting values:\n[{artifacts}].\nTo remedy this issue, "
"be sure to explicitly set those artifacts (using "
"self. = ...) prior to calling merge_artifacts.".format(
step=self._current_step, artifacts=", ".join(unresolved)
)
)
raise UnhandledInMergeArtifactsException(msg, unresolved)
if missing:
msg = (
"Step *{step}* specifies that [{include}] should be merged but "
"[{missing}] are not present.\nTo remedy this issue, make sure "
"that the values specified in only come from at least one branch".format(
step=self._current_step,
include=", ".join(include),
missing=", ".join(missing),
)
)
raise MissingInMergeArtifactsException(msg, missing)
# If things are resolved, we pass down the variables from the input datastores
for var, (inp, _) in to_merge.items():
self._datastore.passdown_partial(inp._datastore, [var])
def _validate_ubf_step(self, step_name):
join_list = self._graph[step_name].out_funcs
if len(join_list) != 1:
msg = (
"UnboundedForeach is supported only over a single node, "
"not an arbitrary DAG. Specify a single `join` node"
" instead of multiple:{join_list}.".format(join_list=join_list)
)
raise InvalidNextException(msg)
join_step = join_list[0]
join_node = self._graph[join_step]
join_type = join_node.type
if join_type != "join":
msg = (
"UnboundedForeach found for:{node} -> {join}."
" The join type isn't valid.".format(node=step_name, join=join_step)
)
raise InvalidNextException(msg)
def _get_foreach_item_value(self, item: Any):
"""
Get the unique value for the item in the foreach iterator. If no suitable value
is found, return the value formatted by reprlib, which is at most 30 characters long.
Parameters
----------
item : Any
The item to get the value from.
Returns
-------
str
The value to use for the item.
"""
def _is_primitive_type(item):
return (
isinstance(item, basestring)
or isinstance(item, int)
or isinstance(item, float)
or isinstance(item, bool)
)
value = item if _is_primitive_type(item) else reprlib.Repr().repr(item)
return basestring(value)[:MAXIMUM_FOREACH_VALUE_CHARS]
def next(self, *dsts: Callable[..., None], **kwargs) -> None:
"""
Indicates the next step to execute after this step has completed.
This statement should appear as the last statement of each step, except
the end step.
There are several valid formats to specify the next step:
- Straight-line connection: `self.next(self.next_step)` where `next_step` is a method in
the current class decorated with the `@step` decorator.
- Static fan-out connection: `self.next(self.step1, self.step2, ...)` where `stepX` are
methods in the current class decorated with the `@step` decorator.
- Foreach branch:
```
self.next(self.foreach_step, foreach='foreach_iterator')
```
In this situation, `foreach_step` is a method in the current class decorated with the
`@step` decorator and `foreach_iterator` is a variable name in the current class that
evaluates to an iterator. A task will be launched for each value in the iterator and
each task will execute the code specified by the step `foreach_step`.
- Switch statement:
```
self.next({"case1": self.step_a, "case2": self.step_b}, condition='condition_variable')
```
In this situation, `step_a` and `step_b` are methods in the current class decorated
with the `@step` decorator and `condition_variable` is a variable name in the current
class. The value of the condition variable determines which step to execute. If the
value doesn't match any of the dictionary keys, a RuntimeError is raised.
Parameters
----------
dsts : Callable[..., None]
One or more methods annotated with `@step`.
Raises
------
InvalidNextException
Raised if the format of the arguments does not match one of the ones given above.
"""
step = self._current_step
foreach = kwargs.pop("foreach", None)
num_parallel = kwargs.pop("num_parallel", None)
condition = kwargs.pop("condition", None)
if kwargs:
kw = next(iter(kwargs))
msg = (
"Step *{step}* passes an unknown keyword argument "
"'{invalid}' to self.next().".format(step=step, invalid=kw)
)
raise InvalidNextException(msg)
# check: next() is called only once
if self._transition is not None:
msg = (
"Multiple self.next() calls detected in step *{step}*. "
"Call self.next() only once.".format(step=step)
)
raise InvalidNextException(msg)
# check: switch case using condition
if condition is not None:
if len(dsts) != 1 or not isinstance(dsts[0], dict) or not dsts[0]:
msg = (
"Step *{step}* has an invalid self.next() transition. "
"When using 'condition', the transition must be to a single, "
"non-empty dictionary mapping condition values to step methods.".format(
step=step
)
)
raise InvalidNextException(msg)
if not isinstance(condition, basestring):
msg = (
"Step *{step}* has an invalid self.next() transition. "
"The argument to 'condition' must be a string.".format(step=step)
)
raise InvalidNextException(msg)
if foreach is not None or num_parallel is not None:
msg = (
"Step *{step}* has an invalid self.next() transition. "
"Switch statements cannot be combined with foreach or num_parallel.".format(
step=step
)
)
raise InvalidNextException(msg)
switch_cases = dsts[0]
# Validate that condition variable exists
try:
condition_value = getattr(self, condition)
except AttributeError:
msg = (
"Condition variable *self.{var}* in step *{step}* "
"does not exist. Make sure you set self.{var} in this step.".format(
step=step, var=condition
)
)
raise InvalidNextException(msg)
if condition_value not in switch_cases:
available_cases = list(switch_cases.keys())
raise RuntimeError(
f"Switch condition variable '{condition}' has value '{condition_value}' "
f"which is not in the available cases: {available_cases}"
)
# Get the chosen step and set transition directly
chosen_step_func = switch_cases[condition_value]
# Validate that the chosen step exists
try:
name = chosen_step_func.__func__.__name__
except:
msg = (
"Step *{step}* specifies a switch transition that is not a function. "
"Make sure the value in the dictionary is a method "
"of the Flow class.".format(step=step)
)
raise InvalidNextException(msg)
if not hasattr(self, name):
msg = (
"Step *{step}* specifies a switch transition to an "
"unknown step, *{name}*.".format(step=step, name=name)
)
raise InvalidNextException(msg)
self._transition = ([name], None)
return
# Check for an invalid transition: a dictionary used without a 'condition' parameter.
if len(dsts) == 1 and isinstance(dsts[0], dict):
msg = (
"Step *{step}* has an invalid self.next() transition. "
"Dictionary argument requires 'condition' parameter.".format(step=step)
)
raise InvalidNextException(msg)
# check: all destinations are methods of this object
funcs = []
for i, dst in enumerate(dsts):
try:
name = dst.__func__.__name__
except:
msg = (
"In step *{step}* the {arg}. argument in self.next() is "
"not a function. Make sure all arguments in self.next() "
"are methods of the Flow class.".format(step=step, arg=i + 1)
)
raise InvalidNextException(msg)
if not hasattr(self, name):
msg = (
"Step *{step}* specifies a self.next() transition to an "
"unknown step, *{name}*.".format(step=step, name=name)
)
raise InvalidNextException(msg)
funcs.append(name)
if num_parallel is not None and num_parallel >= 1:
if len(dsts) > 1:
raise InvalidNextException(
"Only one destination allowed when num_parallel used in self.next()"
)
foreach = "_parallel_ubf_iter"
self._parallel_ubf_iter = ParallelUBF(num_parallel)
# check: foreach is valid
if foreach:
if not isinstance(foreach, basestring):
msg = (
"Step *{step}* has an invalid self.next() transition. "
"The argument to 'foreach' must be a string.".format(step=step)
)
raise InvalidNextException(msg)
if len(dsts) != 1:
msg = (
"Step *{step}* has an invalid self.next() transition. "
"Specify exactly one target for 'foreach'.".format(step=step)
)
raise InvalidNextException(msg)
try:
foreach_iter = getattr(self, foreach)
except:
msg = (
"Foreach variable *self.{var}* in step *{step}* "
"does not exist. Check your variable.".format(
step=step, var=foreach
)
)
raise InvalidNextException(msg)
self._foreach_values = None
if issubclass(type(foreach_iter), UnboundedForeachInput):
self._unbounded_foreach = True
self._foreach_num_splits = None
self._validate_ubf_step(funcs[0])
else:
try:
if INCLUDE_FOREACH_STACK:
self._foreach_values = []
for item in foreach_iter:
value = self._get_foreach_item_value(item)
self._foreach_values.append(value)
self._foreach_num_splits = len(self._foreach_values)
else:
self._foreach_num_splits = sum(1 for _ in foreach_iter)
except Exception as e:
msg = (
"Foreach variable *self.{var}* in step *{step}* "
"is not iterable. Please check details: {err}".format(
step=step, var=foreach, err=str(e)
)
)
raise InvalidNextException(msg)
if self._foreach_num_splits == 0:
msg = (
"Foreach iterator over *{var}* in step *{step}* "
"produced zero splits. Check your variable.".format(
step=step, var=foreach
)
)
raise InvalidNextException(msg)
self._foreach_var = foreach
# check: non-keyword transitions are valid
if foreach is None and condition is None:
if len(dsts) < 1:
msg = (
"Step *{step}* has an invalid self.next() transition. "
"Specify at least one step function as an argument in "
"self.next().".format(step=step)
)
raise InvalidNextException(msg)
self._transition = (funcs, foreach)
def __str__(self):
step_name = getattr(self, "_current_step", None)
if step_name:
index = ",".join(str(idx) for idx, _, _ in self.foreach_stack())
if index:
inp = self.input
if inp is None:
return "" % (self.name, step_name, index)
else:
inp = str(inp)
if len(inp) > 20:
inp = inp[:20] + "..."
return "" % (
self.name,
step_name,
index,
inp,
)
else:
return "" % (self.name, step_name)
else:
return "" % self.name
def __getstate__(self):
raise MetaflowException(
"Flows can't be serialized. Maybe you tried "
"to assign *self* or one of the *inputs* "
"to an attribute? Instead of serializing the "
"whole flow, you should choose specific "
"attributes, e.g. *input.some_var*, to be "
"stored."
)
================================================
FILE: metaflow/graph.py
================================================
import inspect
import ast
import re
from itertools import chain
from .util import to_pod
def deindent_docstring(doc):
if doc:
# Find the indent to remove from the docstring. We consider the following possibilities:
# Option 1:
# """This is the first line
# This is the second line
# """
# Option 2:
# """
# This is the first line
# This is the second line
# """
# Option 3:
# """
# This is the first line
# This is the second line
# """
#
# In all cases, we can find the indent to remove by doing the following:
# - Check the first non-empty line, if it has an indent, use that as the base indent
# - If it does not have an indent and there is a second line, check the indent of the
# second line and use that
saw_first_line = False
matched_indent = None
for line in doc.splitlines():
if line:
matched_indent = re.match("[\t ]+", line)
if matched_indent is not None or saw_first_line:
break
saw_first_line = True
if matched_indent:
return re.sub(r"\n" + matched_indent.group(), "\n", doc).strip()
else:
return doc
else:
return ""
class DAGNode(object):
def __init__(
self, func_ast, decos, wrappers, config_decorators, doc, source_file, lineno
):
self.name = func_ast.name
self.source_file = source_file
# lineno is the start line of decorators in source_file
# func_ast.lineno is lines from decorators start to def of function
self.func_lineno = lineno + func_ast.lineno - 1
self.decorators = decos
self.wrappers = wrappers
self.config_decorators = config_decorators
self.doc = deindent_docstring(doc)
self.parallel_step = any(getattr(deco, "IS_PARALLEL", False) for deco in decos)
# these attributes are populated by _parse
self.tail_next_lineno = 0
self.type = None
self.out_funcs = []
self.has_tail_next = False
self.invalid_tail_next = False
self.num_args = 0
self.switch_cases = {}
self.condition = None
self.foreach_param = None
self.num_parallel = 0
self.parallel_foreach = False
self._parse(func_ast, lineno)
# these attributes are populated by _traverse_graph
self.in_funcs = set()
self.split_parents = []
self.split_branches = []
self.matching_join = None
# these attributes are populated by _postprocess
self.is_inside_foreach = False
def _expr_str(self, expr):
return "%s.%s" % (expr.value.id, expr.attr)
def _parse_switch_dict(self, dict_node):
switch_cases = {}
if isinstance(dict_node, ast.Dict):
for key, value in zip(dict_node.keys, dict_node.values):
case_key = None
# handle string literals
if hasattr(ast, "Str") and isinstance(key, ast.Str):
case_key = key.s
elif isinstance(key, ast.Constant):
case_key = key.value
elif isinstance(key, ast.Attribute):
if isinstance(key.value, ast.Attribute) and isinstance(
key.value.value, ast.Name
):
# This handles self.config.some_key
if key.value.value.id == "self":
config_var = key.value.attr
config_key = key.attr
case_key = f"config:{config_var}.{config_key}"
else:
return None
else:
return None
# handle variables or other dynamic expressions - not allowed
elif isinstance(key, ast.Name):
return None
else:
# can't statically analyze this key
return None
if case_key is None:
return None
# extract the step name from the value
if isinstance(value, ast.Attribute) and isinstance(
value.value, ast.Name
):
if value.value.id == "self":
step_name = value.attr
switch_cases[case_key] = step_name
else:
return None
else:
return None
return switch_cases if switch_cases else None
def _parse(self, func_ast, lineno):
self.num_args = len(func_ast.args.args)
tail = func_ast.body[-1]
# end doesn't need a transition
if self.name == "end":
# TYPE: end
self.type = "end"
# ensure that the tail an expression
if not isinstance(tail, ast.Expr):
return
# determine the type of self.next transition
try:
if not self._expr_str(tail.value.func) == "self.next":
return
self.has_tail_next = True
self.invalid_tail_next = True
self.tail_next_lineno = lineno + tail.lineno - 1
# Check if first argument is a dictionary (switch case)
if (
len(tail.value.args) == 1
and isinstance(tail.value.args[0], ast.Dict)
and any(k.arg == "condition" for k in tail.value.keywords)
):
# This is a switch statement
switch_cases = self._parse_switch_dict(tail.value.args[0])
condition_name = None
# Get condition parameter
for keyword in tail.value.keywords:
if keyword.arg == "condition":
if hasattr(ast, "Str") and isinstance(keyword.value, ast.Str):
condition_name = keyword.value.s
elif isinstance(keyword.value, ast.Constant) and isinstance(
keyword.value.value, str
):
condition_name = keyword.value.value
break
if switch_cases and condition_name:
self.type = "split-switch"
self.condition = condition_name
self.switch_cases = switch_cases
self.out_funcs = list(switch_cases.values())
self.invalid_tail_next = False
return
else:
self.out_funcs = [e.attr for e in tail.value.args]
keywords = dict(
(k.arg, getattr(k.value, "s", None)) for k in tail.value.keywords
)
if len(keywords) == 1:
if "foreach" in keywords:
# TYPE: foreach
self.type = "foreach"
if len(self.out_funcs) == 1:
self.foreach_param = keywords["foreach"]
self.invalid_tail_next = False
elif "num_parallel" in keywords:
self.type = "foreach"
self.parallel_foreach = True
if len(self.out_funcs) == 1:
self.num_parallel = keywords["num_parallel"]
self.invalid_tail_next = False
elif len(keywords) == 0:
if len(self.out_funcs) > 1:
# TYPE: split
self.type = "split"
self.invalid_tail_next = False
elif len(self.out_funcs) == 1:
# TYPE: linear
if self.name == "start":
self.type = "start"
elif self.num_args > 1:
self.type = "join"
else:
self.type = "linear"
self.invalid_tail_next = False
except AttributeError:
return
def __str__(self):
return """*[{0.name} {0.type} ({0.source_file} line {0.func_lineno})]*
in_funcs={in_funcs}
out_funcs={out_funcs}
split_parents={parents}
split_branches={branches}
matching_join={matching_join}
is_inside_foreach={is_inside_foreach}
decorators={decos}
num_args={0.num_args}
has_tail_next={0.has_tail_next} (line {0.tail_next_lineno})
invalid_tail_next={0.invalid_tail_next}
foreach_param={0.foreach_param}
condition={0.condition}
parallel_step={0.parallel_step}
parallel_foreach={0.parallel_foreach}
-> {out}""".format(
self,
matching_join=self.matching_join and "[%s]" % self.matching_join,
is_inside_foreach=self.is_inside_foreach,
out_funcs=", ".join("[%s]" % x for x in self.out_funcs),
in_funcs=", ".join("[%s]" % x for x in self.in_funcs),
parents=", ".join("[%s]" % x for x in self.split_parents),
branches=", ".join("[%s]" % x for x in self.split_branches),
decos=" | ".join(map(str, self.decorators)),
out=", ".join("[%s]" % x for x in self.out_funcs),
)
class FlowGraph(object):
def __init__(self, flow):
self.name = flow.__name__
self.nodes = self._create_nodes(flow)
self.doc = deindent_docstring(flow.__doc__)
# nodes sorted in topological order.
self.sorted_nodes = []
self._traverse_graph()
self._postprocess()
def _create_nodes(self, flow):
nodes = {}
for element in dir(flow):
func = getattr(flow, element)
if callable(func) and hasattr(func, "is_step"):
source_file = inspect.getsourcefile(func)
source_lines, lineno = inspect.getsourcelines(func)
# This also works for code (strips out leading whitspace based on
# first line)
source_code = deindent_docstring("".join(source_lines))
function_ast = ast.parse(source_code).body[0]
node = DAGNode(
function_ast,
func.decorators,
func.wrappers,
func.config_decorators,
func.__doc__,
source_file,
lineno,
)
nodes[element] = node
return nodes
def _postprocess(self):
# any node who has a foreach as any of its split parents
# has is_inside_foreach=True *unless* all of those `foreach`s
# are joined by the node
for node in self.nodes.values():
foreaches = [
p for p in node.split_parents if self.nodes[p].type == "foreach"
]
if [f for f in foreaches if self.nodes[f].matching_join != node.name]:
node.is_inside_foreach = True
def _traverse_graph(self):
def traverse(node, seen, split_parents, split_branches):
add_split_branch = False
try:
self.sorted_nodes.remove(node.name)
except ValueError:
pass
self.sorted_nodes.append(node.name)
if node.type in ("split", "foreach"):
node.split_parents = split_parents
node.split_branches = split_branches
add_split_branch = True
split_parents = split_parents + [node.name]
elif node.type == "split-switch":
node.split_parents = split_parents
node.split_branches = split_branches
elif node.type == "join":
# ignore joins without splits
if split_parents:
self[split_parents[-1]].matching_join = node.name
node.split_parents = split_parents
node.split_branches = split_branches[:-1]
split_parents = split_parents[:-1]
split_branches = split_branches[:-1]
else:
node.split_parents = split_parents
node.split_branches = split_branches
for n in node.out_funcs:
# graph may contain loops - ignore them
if n not in seen:
# graph may contain unknown transitions - ignore them
if n in self:
child = self[n]
child.in_funcs.add(node.name)
traverse(
child,
seen + [n],
split_parents,
split_branches + ([n] if add_split_branch else []),
)
if "start" in self:
traverse(self["start"], [], [], [])
# fix the order of in_funcs
for node in self.nodes.values():
node.in_funcs = sorted(node.in_funcs)
def __getitem__(self, x):
return self.nodes[x]
def __contains__(self, x):
return x in self.nodes
def __iter__(self):
return iter(self.nodes.values())
def __str__(self):
return "\n".join(str(self[n]) for n in self.sorted_nodes)
def output_dot(self):
def edge_specs():
for node in self.nodes.values():
if node.type == "split-switch":
# Label edges for switch cases
for case_value, step_name in node.switch_cases.items():
yield (
'{0} -> {1} [label="{2}" color="blue" fontcolor="blue"];'.format(
node.name, step_name, case_value
)
)
else:
for edge in node.out_funcs:
yield "%s -> %s;" % (node.name, edge)
def node_specs():
for node in self.nodes.values():
if node.type == "split-switch":
# Hexagon shape for switch nodes
condition_label = (
f"switch: {node.condition}" if node.condition else "switch"
)
yield (
'"{0.name}" '
'[ label = <{0.name}
{condition}> '
' fontname = "Helvetica" '
' shape = "hexagon" '
' style = "filled" fillcolor = "lightgreen" ];'
).format(node, condition=condition_label)
else:
nodetype = "join" if node.num_args > 1 else node.type
yield '"{0.name}"' '[ label = <{0.name} | {type}> ' ' fontname = "Helvetica" ' ' shape = "record" ];'.format(
node, type=nodetype
)
return (
"digraph {0.name} {{\n"
"{nodes}\n"
"{edges}\n"
"}}".format(
self, nodes="\n".join(node_specs()), edges="\n".join(edge_specs())
)
)
def output_steps(self):
steps_info = {}
graph_structure = []
def node_to_type(node):
if node.type in ["linear", "start", "end", "join"]:
return node.type
elif node.type == "split":
return "split-static"
elif node.type == "foreach":
if node.parallel_foreach:
return "split-parallel"
return "split-foreach"
elif node.type == "split-switch":
return "split-switch"
return "unknown" # Should never happen
def node_to_dict(name, node):
d = {
"name": name,
"type": node_to_type(node),
"line": node.func_lineno,
"source_file": node.source_file,
"doc": node.doc,
"decorators": [
{
"name": deco.name,
"attributes": to_pod(deco.attributes),
"statically_defined": deco.statically_defined,
"inserted_by": deco.inserted_by,
}
for deco in node.decorators
if not deco.name.startswith("_")
]
+ [
{
"name": deco.decorator_name,
"attributes": {"_args": deco._args, **deco._kwargs},
"statically_defined": deco.statically_defined,
"inserted_by": deco.inserted_by,
}
for deco in chain(node.wrappers, node.config_decorators)
],
"next": node.out_funcs,
}
if d["type"] == "split-foreach":
d["foreach_artifact"] = node.foreach_param
elif d["type"] == "split-parallel":
d["num_parallel"] = node.num_parallel
elif d["type"] == "split-switch":
d["condition"] = node.condition
d["switch_cases"] = node.switch_cases
if node.matching_join:
d["matching_join"] = node.matching_join
return d
def populate_block(start_name, end_name):
cur_name = start_name
resulting_list = []
while cur_name != end_name:
cur_node = self.nodes[cur_name]
node_dict = node_to_dict(cur_name, cur_node)
steps_info[cur_name] = node_dict
resulting_list.append(cur_name)
node_type = node_to_type(cur_node)
if node_type in ("split-static", "split-foreach"):
resulting_list.append(
[
populate_block(s, cur_node.matching_join)
for s in cur_node.out_funcs
]
)
cur_name = cur_node.matching_join
elif node_type == "split-switch":
all_paths = [
populate_block(s, end_name)
for s in cur_node.out_funcs
if s != cur_name
]
resulting_list.append(all_paths)
cur_name = end_name
else:
# handles only linear, start, and join steps.
if cur_node.out_funcs:
cur_name = cur_node.out_funcs[0]
else:
# handles terminal nodes or when we jump to 'end_name'.
break
return resulting_list
graph_structure = populate_block("start", "end")
steps_info["end"] = node_to_dict("end", self.nodes["end"])
graph_structure.append("end")
return steps_info, graph_structure
================================================
FILE: metaflow/includefile.py
================================================
from collections import namedtuple
import gzip
import importlib
import io
import json
import os
from hashlib import sha1
from typing import Any, Callable, Dict, Optional, Union
from metaflow._vendor import click
from metaflow._vendor import yaml
from .exception import MetaflowException
from .parameters import (
DelayedEvaluationParameter,
DeployTimeField,
Parameter,
ParameterContext,
)
from .plugins import DATACLIENTS
from .user_configs.config_options import ConfigInput
from .util import get_username
import functools
# _tracefunc_depth = 0
# def tracefunc(func):
# """Decorates a function to show its trace."""
# @functools.wraps(func)
# def tracefunc_closure(*args, **kwargs):
# global _tracefunc_depth
# """The closure."""
# print(f"{_tracefunc_depth}: {func.__name__}(args={args}, kwargs={kwargs})")
# _tracefunc_depth += 1
# result = func(*args, **kwargs)
# _tracefunc_depth -= 1
# print(f"{_tracefunc_depth} => {result}")
# return result
# return tracefunc_closure
_DelayedExecContext = namedtuple(
"_DelayedExecContext", "flow_name path is_text encoding handler_type echo"
)
# From here on out, this is the IncludeFile implementation.
_dict_dataclients = {d.TYPE: d for d in DATACLIENTS}
class IncludedFile(object):
# Thin wrapper to indicate to the MF client that this object is special
# and should be handled as an IncludedFile when returning it (ie: fetching
# the actual content)
# @tracefunc
def __init__(self, descriptor: Dict[str, Any]):
self._descriptor = descriptor
self._cached_size = None
@property
def descriptor(self):
return self._descriptor
@property
# @tracefunc
def size(self):
if self._cached_size is not None:
return self._cached_size
handler = UPLOADERS.get(self.descriptor.get("type", None), None)
if handler is None:
raise MetaflowException(
"Could not interpret size of IncludedFile: %s"
% json.dumps(self.descriptor)
)
self._cached_size = handler.size(self._descriptor)
return self._cached_size
# @tracefunc
def decode(self, name, var_type="Artifact"):
# We look for the uploader for it and decode it
handler = UPLOADERS.get(self.descriptor.get("type", None), None)
if handler is None:
raise MetaflowException(
"%s '%s' could not be loaded (IncludedFile) because no handler found: %s"
% (var_type, name, json.dumps(self.descriptor))
)
return handler.load(self._descriptor)
class FilePathClass(click.ParamType):
name = "FilePath"
def __init__(self, is_text, encoding):
self._is_text = is_text
self._encoding = encoding
def convert(self, value, param, ctx):
# Click can call convert multiple times, so we need to make sure to only
# convert once. This function will return a DelayedEvaluationParameter
# (if it needs to still perform an upload) or an IncludedFile if not
if isinstance(value, (DelayedEvaluationParameter, IncludedFile)):
return value
# Value will be a string containing one of two things:
# - Scenario A: a JSON blob indicating that the file has already been uploaded.
# This scenario this happens in is as follows:
# + `step-functions create` is called and the IncludeFile has a default
# value. At the time of creation, the file is uploaded and a URL is
# returned; this URL is packaged in a blob by Uploader and passed to
# step-functions as the value of the parameter.
# + when the step function actually runs, the value is passed to click
# through METAFLOW_INIT_XXX; this value is the one returned above
# - Scenario B: A path. The path can either be:
# + B.1: :// like s3://foo/bar or local:///foo/bar
# (right now, we are disabling support for this because the artifact
# can change unlike all other artifacts. It is trivial to re-enable
# + B.2: an actual path to a local file like /foo/bar
# In the first case, we just store an *external* reference to it (so we
# won't upload anything). In the second case we will want to upload something,
# but we only do that in the DelayedEvaluationParameter step.
# ctx can be one of two things:
# - the click context (when called normally)
# - the ParameterContext (when called through _eval_default)
# If not a ParameterContext, we convert it to that
if not isinstance(ctx, ParameterContext):
ctx = ParameterContext(
flow_name=ctx.obj.flow.name,
user_name=get_username(),
parameter_name=param.name,
logger=ctx.obj.echo,
ds_type=ctx.obj.datastore_impl.TYPE,
configs=None,
)
if len(value) > 0 and (value.startswith("{") or value.startswith('"{')):
# This is a blob; no URL starts with `{`. We are thus in scenario A
try:
value = json.loads(value)
# to handle quoted json strings
if not isinstance(value, dict):
value = json.loads(value)
except json.JSONDecodeError as e:
raise MetaflowException(
"IncludeFile '%s' (value: %s) is malformed" % (param.name, value)
)
# All processing has already been done, so we just convert to an `IncludedFile`
return IncludedFile(value)
path = os.path.expanduser(value)
prefix_pos = path.find("://")
if prefix_pos > 0:
# Scenario B.1
raise MetaflowException(
"IncludeFile using a direct reference to a file in cloud storage is no "
"longer supported. Contact the Metaflow team if you need this supported"
)
# if _dict_dataclients.get(path[:prefix_pos]) is None:
# self.fail(
# "IncludeFile: no handler for external file of type '%s' "
# "(given path is '%s')" % (path[:prefix_pos], path)
# )
# # We don't need to do anything more -- the file is already uploaded so we
# # just return a blob indicating how to get the file.
# return IncludedFile(
# CURRENT_UPLOADER.encode_url(
# "external", path, is_text=self._is_text, encoding=self._encoding
# )
# )
else:
# Scenario B.2
# Check if this is a valid local file
try:
with open(path, mode="r") as _:
pass
except OSError:
self.fail("IncludeFile: could not open file '%s' for reading" % path)
handler = _dict_dataclients.get(ctx.ds_type)
if handler is None:
self.fail(
"IncludeFile: no data-client for datastore of type '%s'"
% ctx.ds_type
)
# Now that we have done preliminary checks, we will delay uploading it
# until later (so it happens after PyLint checks the flow, but we prepare
# everything for it)
lambda_ctx = _DelayedExecContext(
flow_name=ctx.flow_name,
path=path,
is_text=self._is_text,
encoding=self._encoding,
handler_type=ctx.ds_type,
echo=ctx.logger,
)
def _delayed_eval_func(ctx=lambda_ctx, return_str=False):
incl_file = IncludedFile(
CURRENT_UPLOADER.store(
ctx.flow_name,
ctx.path,
ctx.is_text,
ctx.encoding,
_dict_dataclients[ctx.handler_type],
ctx.echo,
)
)
if return_str:
return json.dumps(incl_file.descriptor)
return incl_file
return DelayedEvaluationParameter(
ctx.parameter_name,
"default",
functools.partial(_delayed_eval_func, ctx=lambda_ctx),
)
def __str__(self):
return repr(self)
def __repr__(self):
return "FilePath"
class IncludeFile(Parameter):
"""
Includes a local file as a parameter for the flow.
`IncludeFile` behaves like `Parameter` except that it reads its value from a file instead of
the command line. The user provides a path to a file on the command line. The file contents
are saved as a read-only artifact which is available in all steps of the flow.
Parameters
----------
name : str
User-visible parameter name.
default : Union[str, Callable[ParameterContext, str]]
Default path to a local file. A function
implies that the parameter corresponds to a *deploy-time parameter*.
is_text : bool, optional, default None
Convert the file contents to a string using the provided `encoding`.
If False, the artifact is stored in `bytes`. A value of None is equivalent to
True.
encoding : str, optional, default None
Use this encoding to decode the file contexts if `is_text=True`. A value of None
is equivalent to "utf-8".
required : bool, optional, default None
Require that the user specified a value for the parameter.
`required=True` implies that the `default` is not used. A value of None is
equivalent to False
help : str, optional
Help text to show in `run --help`.
show_default : bool, default True
If True, show the default value in the help text. A value of None is equivalent
to True.
parser : Union[str, Callable[[str], Any]], optional, default None
If a callable, it is a function that can parse the file contents
into any desired format. If a string, the string should refer to
a function (like "my_parser_package.my_parser.my_parser_function") which should
be able to parse the file contents. If the name starts with a ".", it is assumed
to be relative to "metaflow".
"""
def __init__(
self,
name: str,
required: Optional[bool] = None,
is_text: Optional[bool] = None,
encoding: Optional[str] = None,
help: Optional[str] = None,
parser: Optional[Union[str, Callable[[str], Any]]] = None,
**kwargs: Dict[str, str]
):
self._includefile_overrides = {}
if is_text is not None:
self._includefile_overrides["is_text"] = is_text
if encoding is not None:
self._includefile_overrides["encoding"] = encoding
self._parser = parser
# NOTA: Right now, there is an issue where these can't be overridden by config
# in all circumstances. Ignoring for now.
super(IncludeFile, self).__init__(
name,
required=required,
help=help,
type=FilePathClass(
self._includefile_overrides.get("is_text", True),
self._includefile_overrides.get("encoding", "utf-8"),
),
**kwargs,
)
def init(self, ignore_errors=False):
super(IncludeFile, self).init(ignore_errors)
# This will use the values set explicitly in the args if present, else will
# use and remove from kwargs else will use True/utf-8
is_text = self._includefile_overrides.get(
"is_text", self.kwargs.pop("is_text", True)
)
encoding = self._includefile_overrides.get(
"encoding", self.kwargs.pop("encoding", "utf-8")
)
# If a default is specified, it needs to be uploaded when the flow is deployed
# (for example when doing a `step-functions create`) so we make the default
# be a DeployTimeField. This means that it will be evaluated in two cases:
# - by deploy_time_eval for `step-functions create` and related.
# - by Click when evaluating the parameter.
#
# In the first case, we will need to fully upload the file whereas in the
# second case, we can just return the string as the FilePath.convert method
# will take care of evaluating things.
v = self.kwargs.get("default")
if v is not None:
# If the default is a callable, we have two DeployTimeField:
# - the callable nature of the default will require us to "call" the default
# (so that is the outer DeployTimeField)
# - IncludeFile defaults are always DeployTimeFields (since they need to be
# uploaded)
#
# Therefore, if the default value is itself a callable, we will have
# a DeployTimeField (upload the file) wrapping another DeployTimeField
# (call the default)
if callable(v) and not isinstance(v, DeployTimeField):
# If default is a callable, make it a DeployTimeField (the inner one)
v = DeployTimeField(self.name, str, "default", v, return_str=True)
self.kwargs["default"] = DeployTimeField(
self.name,
str,
"default",
IncludeFile._eval_default(is_text, encoding, v),
print_representation=v,
)
def load_parameter(self, v):
if v is None:
return v
# Get the raw content from the file
content = v.decode(self.name, var_type="Parameter")
# If a parser is specified, use it to parse the content
if self._parser is not None:
try:
return ConfigInput._call_parser(self._parser, content, True)
except Exception as e:
raise MetaflowException(
"Failed to parse content in parameter '%s' using parser: %s"
% (self.name, str(e))
) from e
return content
@staticmethod
def _eval_default(is_text, encoding, default_path):
# NOTE: If changing name of this function, check comments that refer to it to
# update it.
def do_eval(ctx, deploy_time):
if isinstance(default_path, DeployTimeField):
d = default_path(deploy_time=deploy_time)
else:
d = default_path
if deploy_time:
fp = FilePathClass(is_text, encoding)
val = fp.convert(d, None, ctx)
if isinstance(val, DelayedEvaluationParameter):
val = val()
# At this point this is an IncludedFile, but we need to make it
# into a string so that it can be properly saved.
return json.dumps(val.descriptor)
else:
return d
return do_eval
class UploaderV1:
file_type = "uploader-v1"
@classmethod
def encode_url(cls, url_type, url, **kwargs):
return_value = {"type": url_type, "url": url}
return_value.update(kwargs)
return return_value
@classmethod
def store(cls, flow_name, path, is_text, encoding, handler, echo):
sz = os.path.getsize(path)
unit = ["B", "KB", "MB", "GB", "TB"]
pos = 0
while pos < len(unit) and sz >= 1024:
sz = sz // 1024
pos += 1
if pos >= 3:
extra = "(this may take a while)"
else:
extra = ""
echo("Including file %s of size %d%s %s" % (path, sz, unit[pos], extra))
try:
input_file = io.open(path, mode="rb").read()
except IOError:
# If we get an error here, since we know that the file exists already,
# it means that read failed which happens with Python 2.7 for large files
raise MetaflowException(
"Cannot read file at %s -- this is likely because it is too "
"large to be properly handled by Python 2.7" % path
)
sha = sha1(input_file).hexdigest()
path = os.path.join(handler.get_root_from_config(echo, True), flow_name, sha)
buf = io.BytesIO()
with gzip.GzipFile(fileobj=buf, mode="wb", compresslevel=3) as f:
f.write(input_file)
buf.seek(0)
with handler() as client:
url = client.put(path, buf.getvalue(), overwrite=False)
return cls.encode_url(cls.file_type, url, is_text=is_text, encoding=encoding)
@classmethod
def size(cls, descriptor):
# We never have the size so we look it up
url = descriptor["url"]
handler = cls._get_handler(url)
with handler() as client:
obj = client.info(url, return_missing=True)
if obj.exists:
return obj.size
raise FileNotFoundError("File at '%s' does not exist" % url)
@classmethod
def load(cls, descriptor):
url = descriptor["url"]
handler = cls._get_handler(url)
with handler() as client:
obj = client.get(url, return_missing=True)
if obj.exists:
if descriptor["type"] == cls.file_type:
# We saved this file directly, so we know how to read it out
with gzip.GzipFile(filename=obj.path, mode="rb") as f:
if descriptor["is_text"]:
return io.TextIOWrapper(
f, encoding=descriptor.get("encoding")
).read()
return f.read()
else:
# We open this file according to the is_text and encoding information
if descriptor["is_text"]:
return io.open(
obj.path, mode="rt", encoding=descriptor.get("encoding")
).read()
else:
return io.open(obj.path, mode="rb").read()
raise FileNotFoundError("File at '%s' does not exist" % descriptor["url"])
@staticmethod
def _get_handler(url):
prefix_pos = url.find("://")
if prefix_pos < 0:
raise MetaflowException("Malformed URL: '%s'" % url)
prefix = url[:prefix_pos]
handler = _dict_dataclients.get(prefix)
if handler is None:
raise MetaflowException("Could not find data client for '%s'" % prefix)
return handler
class UploaderV2:
file_type = "uploader-v2"
@classmethod
def encode_url(cls, url_type, url, **kwargs):
return_value = {
"note": "Internal representation of IncludeFile",
"type": cls.file_type,
"sub-type": url_type,
"url": url,
}
return_value.update(kwargs)
return return_value
@classmethod
def store(cls, flow_name, path, is_text, encoding, handler, echo):
r = UploaderV1.store(flow_name, path, is_text, encoding, handler, echo)
# In V2, we store size for faster access
r["note"] = "Internal representation of IncludeFile"
r["type"] = cls.file_type
r["sub-type"] = "uploaded"
r["size"] = os.stat(path).st_size
return r
@classmethod
def size(cls, descriptor):
if descriptor["sub-type"] == "uploaded":
return descriptor["size"]
else:
# This was a file that was external, so we get information on it
url = descriptor["url"]
handler = cls._get_handler(url)
with handler() as client:
obj = client.info(url, return_missing=True)
if obj.exists:
return obj.size
raise FileNotFoundError(
"%s file at '%s' does not exist"
% (descriptor["sub-type"].capitalize(), url)
)
@classmethod
def load(cls, descriptor):
url = descriptor["url"]
# We know the URL is in a :// format so we just extract the handler
handler = cls._get_handler(url)
with handler() as client:
obj = client.get(url, return_missing=True)
if obj.exists:
if descriptor["sub-type"] == "uploaded":
# We saved this file directly, so we know how to read it out
with gzip.GzipFile(filename=obj.path, mode="rb") as f:
if descriptor["is_text"]:
return io.TextIOWrapper(
f, encoding=descriptor.get("encoding")
).read()
return f.read()
else:
# We open this file according to the is_text and encoding information
if descriptor["is_text"]:
return io.open(
obj.path, mode="rt", encoding=descriptor.get("encoding")
).read()
else:
return io.open(obj.path, mode="rb").read()
# If we are here, the file does not exist
raise FileNotFoundError(
"%s file at '%s' does not exist"
% (descriptor["sub-type"].capitalize(), url)
)
@staticmethod
def _get_handler(url):
return UploaderV1._get_handler(url)
UPLOADERS = {
"uploader-v1": UploaderV1,
"external": UploaderV1,
"uploader-v2": UploaderV2,
}
CURRENT_UPLOADER = UploaderV2
================================================
FILE: metaflow/integrations.py
================================================
# This file can contain "shortcuts" to other parts of Metaflow (integrations)
# This is an alternative to providing an extension package where you would define
# these aliases in the toplevel file.
# It follows a similar pattern to plugins so that the these integration aliases can be
# turned on and off and avoid exposing things that are not necessarily needed/wanted.
from metaflow.extension_support.integrations import process_integration_aliases
# To enable an alias `metaflow.integrations.get_s3_client` to
# `metaflow.plugins.aws.aws_client.get_aws_client`, use the following:
#
# ALIASES_DESC = [("get_s3_client", ".plugins.aws.aws_client.get_aws_client")]
#
# ALIASES_DESC is a list of tuples:
# - name: name of the integration alias
# - obj: object it points to
#
ALIASES_DESC = [("ArgoEvent", ".plugins.argo.argo_events.ArgoEvent")]
# Aliases can be enabled or disabled through configuration or extensions:
# - ENABLED_INTEGRATION_ALIAS: list of alias names to enable.
# - TOGGLE_INTEGRATION_ALIAS: if ENABLED_INTEGRATION_ALIAS is not set anywhere
# (environment variable, configuration or extensions), list of integration aliases
# to toggle (+ or enables, - disables) to build
# ENABLED_INTEGRATION_ALIAS from the concatenation of the names in
# ALIASES_DESC (concatenation of the names here as well as in the extensions).
# Keep this line and make sure ALIASES_DESC is above this line.
process_integration_aliases(globals())
================================================
FILE: metaflow/lint.py
================================================
import re
from .exception import MetaflowException
from .util import all_equal
class LintWarn(MetaflowException):
headline = "Validity checker found an issue"
class FlowLinter(object):
def __init__(self):
self.require_static_graph = True
self.require_fundamentals = True
self.require_acyclicity = True
self.require_non_nested_foreach = False
self._checks = []
def _decorate(self, setting, f):
f.attrs.append(setting)
return f
def ensure_static_graph(self, f):
return self._decorate("require_static_graph", f)
def ensure_fundamentals(self, f):
return self._decorate("require_fundamentals", f)
def ensure_acyclicity(self, f):
return self._decorate("require_acyclicity", f)
def ensure_non_nested_foreach(self, f):
return self._decorate("require_non_nested_foreach", f)
def check(self, f):
self._checks.append(f)
f.attrs = []
return f
def run_checks(self, graph, **kwargs):
for check in self._checks:
if any(getattr(self, attr) or kwargs.get(attr) for attr in check.attrs):
check(graph)
linter = FlowLinter()
@linter.ensure_fundamentals
@linter.check
def check_reserved_words(graph):
RESERVED = {"name", "next", "input", "index", "cmd"}
msg = "Step name *%s* is a reserved word. Choose another name for the " "step."
for node in graph:
if node.name in RESERVED:
raise LintWarn(msg % node.name, node.func_lineno, node.source_file)
@linter.ensure_fundamentals
@linter.check
def check_basic_steps(graph):
msg = "Add %s *%s* step in your flow."
for prefix, node in (("a", "start"), ("an", "end")):
if node not in graph:
raise LintWarn(msg % (prefix, node))
@linter.ensure_static_graph
@linter.check
def check_that_end_is_end(graph):
msg0 = "The *end* step should not have a step.next() transition. " "Just remove it."
msg1 = (
"The *end* step should not be a join step (it gets an extra "
"argument). Add a join step before it."
)
node = graph["end"]
if node.has_tail_next or node.invalid_tail_next:
raise LintWarn(msg0, node.tail_next_lineno, node.source_file)
if node.num_args > 1:
raise LintWarn(msg1, node.tail_next_lineno, node.source_file)
@linter.ensure_fundamentals
@linter.check
def check_step_names(graph):
msg = (
"Step *{0.name}* has an invalid name. Only lowercase ascii "
"characters, underscores, and digits are allowed."
)
for node in graph:
if re.search("[^a-z0-9_]", node.name) or node.name[0] == "_":
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
@linter.ensure_fundamentals
@linter.check
def check_num_args(graph):
msg0 = (
"Step {0.name} has too many arguments. Normal steps take only "
"'self' as an argument. Join steps take 'self' and 'inputs'."
)
msg1 = (
"Step *{0.name}* is both a join step (it takes an extra argument) "
"and a split step (it transitions to multiple steps). This is not "
"allowed. Add a new step so that split and join become separate steps."
)
msg2 = "Step *{0.name}* is missing the 'self' argument."
for node in graph:
if node.num_args > 2:
raise LintWarn(msg0.format(node), node.func_lineno, node.source_file)
elif node.num_args == 2 and node.type != "join":
raise LintWarn(msg1.format(node), node.func_lineno, node.source_file)
elif node.num_args == 0:
raise LintWarn(msg2.format(node), node.func_lineno, node.source_file)
@linter.ensure_static_graph
@linter.check
def check_static_transitions(graph):
msg = (
"Step *{0.name}* is missing a self.next() transition to "
"the next step. Add a self.next() as the last line in the "
"function."
)
for node in graph:
if node.type != "end" and not node.has_tail_next:
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
@linter.ensure_static_graph
@linter.check
def check_valid_transitions(graph):
msg = (
"Step *{0.name}* specifies an invalid self.next() transition. "
"Make sure the self.next() expression matches with one of the "
"supported transition types:\n"
" • Linear: self.next(self.step_name)\n"
" • Fan-out: self.next(self.step1, self.step2, ...)\n"
" • Foreach: self.next(self.step, foreach='variable')\n"
" • Switch: self.next({{\"key\": self.step, ...}}, condition='variable')\n\n"
"For switch statements, keys must be string literals, numbers or config expressions "
"(self.config.key_name), not variables."
)
for node in graph:
if node.type != "end" and node.has_tail_next and node.invalid_tail_next:
raise LintWarn(msg.format(node), node.tail_next_lineno, node.source_file)
@linter.ensure_static_graph
@linter.check
def check_unknown_transitions(graph):
msg = (
"Step *{0.name}* specifies a self.next() transition to "
"an unknown step, *{step}*."
)
for node in graph:
unknown = [n for n in node.out_funcs if n not in graph]
if unknown:
raise LintWarn(
msg.format(node, step=unknown[0]),
node.tail_next_lineno,
node.source_file,
)
@linter.ensure_acyclicity
@linter.ensure_static_graph
@linter.check
def check_for_acyclicity(graph):
msg = (
"There is a loop in your flow: *{0}*. Break the loop "
"by fixing self.next() transitions."
)
def check_path(node, seen):
for n in node.out_funcs:
if node.type == "split-switch" and n == node.name:
continue
if n in seen:
path = "->".join(seen + [n])
raise LintWarn(
msg.format(path), node.tail_next_lineno, node.source_file
)
else:
check_path(graph[n], seen + [n])
for start in graph:
check_path(start, [])
@linter.ensure_static_graph
@linter.check
def check_for_orphans(graph):
msg = (
"Step *{0.name}* is unreachable from the start step. Add "
"self.next({0.name}) in another step or remove *{0.name}*."
)
seen = set(["start"])
def traverse(node):
for n in node.out_funcs:
if n not in seen:
seen.add(n)
traverse(graph[n])
traverse(graph["start"])
nodeset = frozenset(n.name for n in graph)
orphans = nodeset - seen
if orphans:
orphan = graph[list(orphans)[0]]
raise LintWarn(msg.format(orphan), orphan.func_lineno, orphan.source_file)
@linter.ensure_static_graph
@linter.check
def check_split_join_balance(graph):
msg0 = (
"Step *end* reached before a split started at step(s) *{roots}* "
"were joined. Add a join step before *end*."
)
msg1 = (
"Step *{0.name}* seems like a join step (it takes an extra input "
"argument) but an incorrect number of steps (*{paths}*) lead to "
"it. This join was expecting {num_roots} incoming paths, starting "
"from split step(s) *{roots}*."
)
msg2 = (
"Step *{0.name}* seems like a join step (it takes an extra input "
"argument) but it is not preceded by a split. Ensure that there is "
"a matching split for every join."
)
msg3 = (
"Step *{0.name}* joins steps from unrelated splits. Ensure that "
"there is a matching join for every split."
)
def traverse(node, split_stack):
if node.type in ("start", "linear"):
new_stack = split_stack
elif node.type in ("split", "foreach"):
new_stack = split_stack + [("split", node.out_funcs)]
elif node.type == "split-switch":
# For a switch, continue traversal down each path with the same stack
for n in node.out_funcs:
if node.type == "split-switch" and n == node.name:
continue
traverse(graph[n], split_stack)
return
elif node.type == "end":
new_stack = split_stack
if split_stack:
_, split_roots = split_stack.pop()
roots = ", ".join(split_roots)
raise LintWarn(
msg0.format(roots=roots), node.func_lineno, node.source_file
)
elif node.type == "join":
new_stack = split_stack
if split_stack:
_, split_roots = split_stack[-1]
new_stack = split_stack[:-1]
# Resolve each incoming function to its root branch from the split.
resolved_branches = set(
graph[n].split_branches[-1] for n in node.in_funcs
)
# compares the set of resolved branches against the expected branches
# from the split.
if len(resolved_branches) != len(
split_roots
) or resolved_branches ^ set(split_roots):
paths = ", ".join(resolved_branches)
roots = ", ".join(split_roots)
raise LintWarn(
msg1.format(
node, paths=paths, num_roots=len(split_roots), roots=roots
),
node.func_lineno,
node.source_file,
)
else:
raise LintWarn(msg2.format(node), node.func_lineno, node.source_file)
# check that incoming steps come from the same lineage
# (no cross joins)
def parents(n):
if graph[n].type == "join":
return tuple(graph[n].split_parents[:-1])
else:
return tuple(graph[n].split_parents)
if not all_equal(map(parents, node.in_funcs)):
raise LintWarn(msg3.format(node), node.func_lineno, node.source_file)
else:
new_stack = split_stack
for n in node.out_funcs:
if node.type == "split-switch" and n == node.name:
continue
traverse(graph[n], new_stack)
traverse(graph["start"], [])
@linter.ensure_static_graph
@linter.check
def check_switch_splits(graph):
"""Check conditional split constraints"""
msg0 = (
"Step *{0.name}* is a switch split but defines {num} transitions. "
"Switch splits must define at least 2 transitions."
)
msg1 = "Step *{0.name}* is a switch split but has no condition variable."
msg2 = "Step *{0.name}* is a switch split but has no switch cases defined."
for node in graph:
if node.type == "split-switch":
# Check at least 2 outputs
if len(node.out_funcs) < 2:
raise LintWarn(
msg0.format(node, num=len(node.out_funcs)),
node.func_lineno,
node.source_file,
)
# Check condition exists
if not node.condition:
raise LintWarn(
msg1.format(node),
node.func_lineno,
node.source_file,
)
# Check switch cases exist
if not node.switch_cases:
raise LintWarn(
msg2.format(node),
node.func_lineno,
node.source_file,
)
@linter.ensure_static_graph
@linter.check
def check_empty_foreaches(graph):
msg = (
"Step *{0.name}* is a foreach split that has no children: "
"it is followed immediately by a join step, *{join}*. Add "
"at least one step between the split and the join."
)
for node in graph:
if node.type == "foreach":
joins = [n for n in node.out_funcs if graph[n].type == "join"]
if joins:
raise LintWarn(
msg.format(node, join=joins[0]), node.func_lineno, node.source_file
)
@linter.ensure_static_graph
@linter.check
def check_parallel_step_after_next(graph):
msg = (
"Step *{0.name}* is called as a parallel step with self.next(num_parallel=..) "
"but does not have a @parallel decorator."
)
for node in graph:
if node.parallel_foreach and not all(
graph[out_node].parallel_step for out_node in node.out_funcs
):
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
@linter.ensure_static_graph
@linter.check
def check_join_followed_by_parallel_step(graph):
msg = (
"An @parallel step should be followed by a join step. Step *{0}* is called "
"after an @parallel step but is not a join step. Please add an extra `inputs` "
"argument to the step."
)
for node in graph:
if node.parallel_step and not graph[node.out_funcs[0]].type == "join":
raise LintWarn(
msg.format(node.out_funcs[0]), node.func_lineno, node.source_file
)
@linter.ensure_static_graph
@linter.check
def check_parallel_foreach_calls_parallel_step(graph):
msg = (
"Step *{0.name}* has a @parallel decorator, but is not called "
"with self.next(num_parallel=...) from step *{1.name}*."
)
for node in graph:
if node.parallel_step:
for node2 in graph:
if node2.out_funcs and node.name in node2.out_funcs:
if not node2.parallel_foreach:
raise LintWarn(
msg.format(node, node2), node.func_lineno, node.source_file
)
@linter.ensure_non_nested_foreach
@linter.check
def check_nested_foreach(graph):
msg = (
"Nested foreaches are not allowed: Step *{0.name}* is a foreach "
"split that is nested under another foreach split."
)
for node in graph:
if node.type == "foreach":
if any(graph[p].type == "foreach" for p in node.split_parents):
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
@linter.ensure_static_graph
@linter.check
def check_ambiguous_joins(graph):
for node in graph:
if node.type == "join":
problematic_parents = [
p_name
for p_name in node.in_funcs
if graph[p_name].type == "split-switch"
]
if problematic_parents:
msg = (
"A conditional path cannot lead directly to a join step.\n"
"In your conditional step(s) {parents}, one or more of the possible paths transition directly to the join step {join_name}.\n"
"As a workaround, please introduce an intermediate, unconditional step on that specific path before joining."
).format(
parents=", ".join("*%s*" % p for p in problematic_parents),
join_name="*%s*" % node.name,
)
raise LintWarn(msg, node.func_lineno, node.source_file)
================================================
FILE: metaflow/meta_files.py
================================================
_UNINITIALIZED = object()
_info_file_content = _UNINITIALIZED
def read_info_file():
# Prevent circular import
from .packaging_sys import MetaflowCodeContent
global _info_file_content
if id(_info_file_content) == id(_UNINITIALIZED):
_info_file_content = MetaflowCodeContent.get_info()
return _info_file_content
================================================
FILE: metaflow/metadata_provider/__init__.py
================================================
from .metadata import DataArtifact, MetadataProvider, MetaDatum
================================================
FILE: metaflow/metadata_provider/heartbeat.py
================================================
import json
import time
from threading import Thread
import requests
from metaflow.exception import MetaflowException
from metaflow.metaflow_config import SERVICE_HEADERS
from metaflow.sidecar import Message, MessageTypes
HB_URL_KEY = "hb_url"
class HeartBeatException(MetaflowException):
headline = "Metaflow heart beat error"
def __init__(self, msg):
super(HeartBeatException, self).__init__(msg)
class MetadataHeartBeat(object):
def __init__(self):
self.headers = SERVICE_HEADERS
self.req_thread = Thread(target=self._ping)
self.req_thread.daemon = True
self.default_frequency_secs = 10
self.hb_url = None
def process_message(self, msg):
# type: (Message) -> None
if msg.msg_type == MessageTypes.SHUTDOWN:
self._shutdown()
if not self.req_thread.is_alive():
# set post url
self.hb_url = msg.payload[HB_URL_KEY]
# start thread
self.req_thread.start()
@classmethod
def get_worker(cls):
return cls
def _ping(self):
retry_counter = 0
while True:
try:
frequency_secs = self._heartbeat()
if frequency_secs is None or frequency_secs <= 0:
frequency_secs = self.default_frequency_secs
time.sleep(frequency_secs)
retry_counter = 0
except HeartBeatException as e:
print(e)
retry_counter = retry_counter + 1
time.sleep(1.5**retry_counter)
def _heartbeat(self):
if self.hb_url is not None:
try:
response = requests.post(
url=self.hb_url, data="{}", headers=self.headers.copy()
)
except requests.exceptions.ConnectionError as e:
raise HeartBeatException(
"HeartBeat request (%s) failed" " (ConnectionError)" % (self.hb_url)
)
except requests.exceptions.Timeout as e:
raise HeartBeatException(
"HeartBeat request (%s) failed" " (Timeout)" % (self.hb_url)
)
except requests.exceptions.RequestException as e:
raise HeartBeatException(
"HeartBeat request (%s) failed"
" (RequestException) %s" % (self.hb_url, str(e))
)
# Unfortunately, response.json() returns a string that we need
# to cast to json; however when the request encounters an error
# the return type is a json blob :/
if response.status_code == 200:
return json.loads(response.json()).get("wait_time_in_seconds")
else:
raise HeartBeatException(
"HeartBeat request (%s) failed"
" (code %s): %s"
% (self.hb_url, response.status_code, response.text)
)
return None
def _shutdown(self):
# attempts sending one last heartbeat
self._heartbeat()
================================================
FILE: metaflow/metadata_provider/metadata.py
================================================
import json
import os
import re
import time
from collections import namedtuple
from itertools import chain
from typing import List
from metaflow.exception import MetaflowInternalError, MetaflowTaggingError
from metaflow.tagging_util import validate_tag
from metaflow.util import get_username, resolve_identity_as_tuple, is_stringish
DataArtifact = namedtuple("DataArtifact", "name ds_type ds_root url type sha")
MetaDatum = namedtuple("MetaDatum", "field value type tags")
attempt_id_re = re.compile(r"attempt_id:([0-9]+)")
class MetadataProviderMeta(type):
def __new__(metaname, classname, bases, attrs):
return type.__new__(metaname, classname, bases, attrs)
def _get_info(classobject):
if not classobject._INFO:
classobject._INFO = classobject.default_info()
return classobject._INFO
def _set_info(classobject, val):
v = classobject.compute_info(val)
classobject._INFO = v
def __init__(classobject, classname, bases, attrs):
classobject._INFO = None
INFO = property(_get_info, _set_info)
# From https://stackoverflow.com/questions/22409430/portable-meta-class-between-python2-and-python3
def with_metaclass(mcls):
def decorator(cls):
body = vars(cls).copy()
# clean out class body
body.pop("__dict__", None)
body.pop("__weakref__", None)
return mcls(cls.__name__, cls.__bases__, body)
return decorator
class ObjectOrder:
# Consider this list a constant that should never change.
# Lots of code depend on the membership of this list as
# well as exact ordering
_order_as_list = [
"root",
"flow",
"run",
"step",
"task",
"artifact",
"metadata",
"self",
]
_order_as_dict = {v: i for i, v in enumerate(_order_as_list)}
@staticmethod
def order_to_type(order):
if order < len(ObjectOrder._order_as_list):
return ObjectOrder._order_as_list[order]
return None
@staticmethod
def type_to_order(obj_type):
return ObjectOrder._order_as_dict.get(obj_type)
@with_metaclass(MetadataProviderMeta)
class MetadataProvider(object):
TYPE = None
@classmethod
def metadata_str(cls):
return "%s@%s" % (cls.TYPE, cls.INFO)
@classmethod
def compute_info(cls, val):
"""
Compute the new information for this provider
The computed value should be returned and will then be accessible directly as cls.INFO.
This information will be printed by the client when describing this metadata provider
Parameters
----------
val : str
Provider specific information used in computing the new information. For example, this
can be a path.
Returns
-------
str :
Value to be set to INFO
"""
return ""
@classmethod
def default_info(cls):
"""
Returns the default information for this provider
This should compute and return the default value for the information regarding this provider.
For example, this can compute where the metadata is stored
Returns
-------
str
Value to be set by default in INFO
"""
return ""
def version(self):
"""
Returns the version of this provider
Returns
-------
str
Version of the provider
"""
return ""
def new_run_id(self, tags=None, sys_tags=None):
"""
Creates an ID and registers this new run.
The run ID will be unique within a given flow.
Parameters
----------
tags : list, optional
Tags to apply to this particular run, by default None
sys_tags : list, optional
System tags to apply to this particular run, by default None
Returns
-------
int
Run ID for the run
"""
raise NotImplementedError()
def register_run_id(self, run_id, tags=None, sys_tags=None):
"""
No-op operation in this implementation.
Parameters
----------
run_id : int
Run ID for this run
tags : list, optional
Tags to apply to this particular run, by default None
sys_tags : list, optional
System tags to apply to this particular run, by default None
Returns
-------
bool
True if a new run was registered; False if it already existed
"""
raise NotImplementedError()
def new_task_id(self, run_id, step_name, tags=None, sys_tags=None):
"""
Creates an ID and registers this new task.
The task ID will be unique within a flow, run and step
Parameters
----------
run_id : int
ID of the run
step_name : string
Name of the step
tags : list, optional
Tags to apply to this particular task, by default None
sys_tags : list, optional
System tags to apply to this particular task, by default None
Returns
-------
int
Task ID for the task
"""
raise NotImplementedError()
def register_task_id(
self, run_id, step_name, task_id, attempt=0, tags=None, sys_tags=None
):
"""
No-op operation in this implementation.
Parameters
----------
run_id : int or convertible to int
Run ID for this run
step_name : string
Name of the step
task_id : int
Task ID
tags : list, optional
Tags to apply to this particular run, by default []
sys_tags : list, optional
System tags to apply to this particular run, by default []
Returns
-------
bool
True if a new run was registered; False if it already existed
"""
raise NotImplementedError()
def get_runtime_environment(self, runtime_name):
"""
Returns a dictionary of environment variables to be set
Parameters
----------
runtime_name : string
Name of the runtime for which to get the environment
Returns
-------
dict[string] -> string
Environment variables from this metadata provider
"""
return {"METAFLOW_RUNTIME_NAME": runtime_name, "USER": get_username()}
def register_data_artifacts(
self, run_id, step_name, task_id, attempt_id, artifacts
):
"""
Registers the fact that the data-artifacts are associated with
the particular task.
Artifacts produced by a given task can be associated with the
task using this call
Parameters
----------
run_id : int
Run ID for the task
step_name : string
Step name for the task
task_id : int
Task ID for the task
attempt_id : int
Attempt for the task
artifacts : List of DataArtifact
Artifacts associated with this task
"""
raise NotImplementedError()
def register_metadata(self, run_id, step_name, task_id, metadata):
"""
Registers metadata with a task.
Note that the same metadata can be registered multiple times for the same task (for example
by multiple attempts). Internally, the timestamp of when the registration call is made is
also recorded allowing the user to determine the latest value of the metadata.
Parameters
----------
run_id : int
Run ID for the task
step_name : string
Step name for the task
task_id : int
Task ID for the task
metadata : List of MetaDatum
Metadata associated with this task
"""
raise NotImplementedError()
def start_task_heartbeat(self, flow_id, run_id, step_name, task_id):
pass
def start_run_heartbeat(self, flow_id, run_id):
pass
def stop_heartbeat(self):
pass
@classmethod
def _get_object_internal(
cls, obj_type, obj_order, sub_type, sub_order, filters, attempt, *args
):
"""
Return objects for the implementation of this class
See get_object_internal for the description of what this function does
Parameters
----------
obj_type : string
One of 'root', 'flow', 'run', 'step', 'task', 'artifact'
obj_order: int
Order in the list ['root', 'flow', 'run', 'step', 'task', 'artifact']
sub_type : string
Same as obj_type with the addition of 'metadata', 'self'
sub_order:
Order in the same list as the one for obj_order + ['metadata', 'self']
filters : dict
Dictionary with keys 'any_tags', 'tags' and 'system_tags'. If specified
will return only objects that have the specified tags present. Filters
are ANDed together so all tags must be present for the object to be returned.
attempt : int or None
If None, returns artifacts for latest *done* attempt and all metadata. Otherwise,
returns artifacts for that attempt (existent, done or not) and *all* metadata
NOTE: Unlike its external facing `get_object`, this function should
return *all* metadata; the base class will properly implement the
filter. For artifacts, this function should filter artifacts at
the backend level.
Return
------
object or list :
Depending on the call, the type of object return varies
"""
raise NotImplementedError()
def add_sticky_tags(self, tags=None, sys_tags=None):
"""
Adds tags to be added to every run and task
Tags can be added to record information about a run/task. Such tags can be specified on a
per run or task basis using the new_run_id/register_run_id or new_task_id/register_task_id
functions but can also be set globally using this function. Tags added here will be
added to every run/task created after this call is made.
Parameters
----------
tags : list, optional
Tags to add to every run/task, by default None
sys_tags : list, optional
System tags to add to every run/task, by default None
"""
if tags:
self.sticky_tags.update(tags)
if sys_tags:
self.sticky_sys_tags.update(sys_tags)
@classmethod
def get_object(cls, obj_type, sub_type, filters, attempt, *args):
"""Returns the requested object depending on obj_type and sub_type
obj_type can be one of 'root', 'flow', 'run', 'step', 'task',
or 'artifact'
sub_type describes the aggregation required and can be either:
'metadata', 'self' or any of obj_type provided that it is slotted below
the object itself. For example, if obj_type is 'flow', you can
specify 'run' to get all the runs in that flow.
A few special rules:
- 'metadata' is only allowed for obj_type 'task'
- For obj_type 'artifact', only 'self' is allowed
A few examples:
- To get a list of all flows:
- set obj_type to 'root' and sub_type to 'flow'
- To get a list of all tasks:
- set obj_type to 'root' and sub_type to 'task'
- To get a list of all artifacts in a task:
- set obj_type to 'task' and sub_type to 'artifact'
- To get information about a specific flow:
- set obj_type to 'flow' and sub_type to 'self'
Parameters
----------
obj_type : string
One of 'root', 'flow', 'run', 'step', 'task', 'artifact' or 'metadata'
sub_type : string
Same as obj_type with the addition of 'self'
filters : dict
Dictionary with keys 'any_tags', 'tags' and 'system_tags'. If specified
will return only objects that have the specified tags present. Filters
are ANDed together so all tags must be present for the object to be returned.
attempt : int or None
If None, for metadata and artifacts:
- returns information about the latest attempt for artifacts
- returns all metadata across all attempts
Otherwise, returns information about metadata and artifacts for that
attempt only.
NOTE: For older versions of Metaflow (pre 2.4.0), the attempt for
metadata is not known; in that case, all metadata is returned (as
if None was passed in).
Return
------
object or list :
Depending on the call, the type of object return varies
"""
type_order = ObjectOrder.type_to_order(obj_type)
sub_order = ObjectOrder.type_to_order(sub_type)
if type_order is None:
raise MetaflowInternalError(msg="Cannot find type %s" % obj_type)
if type_order >= ObjectOrder.type_to_order("metadata"):
raise MetaflowInternalError(msg="Type %s is not allowed" % obj_type)
if sub_order is None:
raise MetaflowInternalError(msg="Cannot find subtype %s" % sub_type)
if type_order >= sub_order:
raise MetaflowInternalError(
msg="Subtype %s not allowed for %s" % (sub_type, obj_type)
)
# Metadata is always only at the task level
if sub_type == "metadata" and obj_type != "task":
raise MetaflowInternalError(
msg="Metadata can only be retrieved at the task level"
)
if attempt is not None:
try:
attempt_int = int(attempt)
if attempt_int < 0:
raise ValueError("Attempt can only be positive")
except ValueError:
raise ValueError("Attempt can only be a positive integer")
else:
attempt_int = None
pre_filter = cls._get_object_internal(
obj_type, type_order, sub_type, sub_order, filters, attempt_int, *args
)
if attempt_int is None or sub_type != "metadata":
# If no attempt or not for metadata, just return as is
return pre_filter
return MetadataProvider._reconstruct_metadata_for_attempt(
pre_filter, attempt_int
)
@classmethod
def mutate_user_tags_for_run(
cls, flow_id, run_id, tags_to_remove=None, tags_to_add=None
):
"""
Mutate the set of user tags for a run.
Removals logically get applied after additions. Operations occur as a batch atomically.
Parameters
----------
flow_id : str
Flow id, that the run belongs to.
run_id: str
Run id, together with flow_id, that identifies the specific Run whose tags to mutate
tags_to_remove: iterable over str
Iterable over tags to remove
tags_to_add: iterable over str
Iterable over tags to add
Return
------
Run tags after mutation operations
"""
# perform common validation, across all provider implementations
if tags_to_remove is None:
tags_to_remove = []
if tags_to_add is None:
tags_to_add = []
if not tags_to_add and not tags_to_remove:
raise MetaflowTaggingError("Must add or remove at least one tag")
if is_stringish(tags_to_add):
raise MetaflowTaggingError("tags_to_add may not be a string")
if is_stringish(tags_to_remove):
raise MetaflowTaggingError("tags_to_remove may not be a string")
def _is_iterable(something):
try:
iter(something)
return True
except TypeError:
return False
if not _is_iterable(tags_to_add):
raise MetaflowTaggingError("tags_to_add must be iterable")
if not _is_iterable(tags_to_remove):
raise MetaflowTaggingError("tags_to_remove must be iterable")
# check each tag is valid
for tag in chain(tags_to_add, tags_to_remove):
validate_tag(tag)
# onto subclass implementation
final_user_tags = cls._mutate_user_tags_for_run(
flow_id, run_id, tags_to_add=tags_to_add, tags_to_remove=tags_to_remove
)
return final_user_tags
@classmethod
def _mutate_user_tags_for_run(
cls, flow_id, run_id, tags_to_add=None, tags_to_remove=None
):
"""
To be implemented by subclasses of MetadataProvider.
See mutate_user_tags_for_run() for expectations.
"""
raise NotImplementedError()
def _all_obj_elements(self, tags=None, sys_tags=None):
return MetadataProvider._all_obj_elements_static(
self._flow_name, tags=tags, sys_tags=sys_tags
)
@staticmethod
def _all_obj_elements_static(flow_name, tags=None, sys_tags=None):
user = get_username()
return {
"flow_id": flow_name,
"user_name": user,
"tags": list(tags) if tags else [],
"system_tags": list(sys_tags) if sys_tags else [],
"ts_epoch": int(round(time.time() * 1000)),
}
def _flow_to_json(self):
# No need to store tags, sys_tags or username at the flow level
# since runs are the top level logical concept, which is where we
# store tags, sys_tags and username
return {"flow_id": self._flow_name, "ts_epoch": int(round(time.time() * 1000))}
def _run_to_json(self, run_id=None, tags=None, sys_tags=None):
return MetadataProvider._run_to_json_static(
self._flow_name, run_id=run_id, tags=tags, sys_tags=sys_tags
)
@staticmethod
def _run_to_json_static(flow_name, run_id=None, tags=None, sys_tags=None):
if run_id is not None:
d = {"run_number": run_id}
else:
d = {}
d.update(MetadataProvider._all_obj_elements_static(flow_name, tags, sys_tags))
return d
def _step_to_json(self, run_id, step_name, tags=None, sys_tags=None):
d = {"run_number": run_id, "step_name": step_name}
d.update(self._all_obj_elements(tags, sys_tags))
return d
def _task_to_json(self, run_id, step_name, task_id=None, tags=None, sys_tags=None):
d = {"run_number": run_id, "step_name": step_name}
if task_id is not None:
d["task_id"] = task_id
d.update(self._all_obj_elements(tags, sys_tags))
return d
def _object_to_json(
self,
obj_type,
run_id=None,
step_name=None,
task_id=None,
tags=None,
sys_tags=None,
):
if obj_type == "task":
return self._task_to_json(run_id, step_name, task_id, tags, sys_tags)
if obj_type == "step":
return self._step_to_json(run_id, step_name, tags, sys_tags)
if obj_type == "run":
return self._run_to_json(run_id, tags, sys_tags)
return self._flow_to_json()
def _artifacts_to_json(self, run_id, step_name, task_id, attempt_id, artifacts):
result = []
for art in artifacts:
d = {
"run_number": run_id,
"step_name": step_name,
"task_id": task_id,
"attempt_id": attempt_id,
"name": art.name,
"content_type": art.type,
"type": "metaflow.artifact",
"sha": art.sha,
"ds_type": art.ds_type,
"location": art.url if art.url else ":root:%s" % art.ds_root,
}
d.update(self._all_obj_elements(self.sticky_tags, self.sticky_sys_tags))
result.append(d)
return result
def _metadata_to_json(self, run_id, step_name, task_id, metadata):
user = get_username()
return [
{
"flow_id": self._flow_name,
"run_number": run_id,
"step_name": step_name,
"task_id": task_id,
"field_name": datum.field,
"type": datum.type,
"value": datum.value,
"tags": list(set(datum.tags)) if datum.tags else [],
"user_name": user,
"ts_epoch": int(round(time.time() * 1000)),
}
for datum in metadata
]
def _get_system_info_as_dict(self):
"""This function drives:
- sticky system tags initialization
- task-level metadata generation
"""
sys_info = dict()
env = self._environment.get_environment_info()
sys_info["runtime"] = env["runtime"]
sys_info["python_version"] = env["python_version_code"]
identity_type, identity_value = resolve_identity_as_tuple()
sys_info[identity_type] = identity_value
if env["metaflow_version"]:
sys_info["metaflow_version"] = env["metaflow_version"]
if "metaflow_r_version" in env:
sys_info["metaflow_r_version"] = env["metaflow_r_version"]
if "r_version_code" in env:
sys_info["r_version"] = env["r_version_code"]
return sys_info
def _get_git_info_as_dict(self):
git_info = {}
# NOTE: For flows executing remotely, we want to read from the INFO file of the code package that contains
# information on the original environment that deployed the flow.
# Otherwise git related info will be missing, as the repository is not part of the codepackage.
from metaflow.packaging_sys import MetaflowCodeContent
env = MetaflowCodeContent.get_info() or self._environment.get_environment_info()
for key in [
"repo_url",
"branch_name",
"commit_sha",
"has_uncommitted_changes",
]:
if key in env and env[key]:
git_info[key] = env[key]
return git_info
def _get_system_tags(self):
"""Convert system info dictionary into a list of system tags"""
return [
"{}:{}".format(k, v) for k, v in self._get_system_info_as_dict().items()
]
def _register_system_metadata(self, run_id, step_name, task_id, attempt):
"""Gather up system and code packaging info and register them as task metadata"""
metadata = []
# Take everything from system info and store them as metadata
sys_info = self._get_system_info_as_dict()
# field, and type could get long in theory...can the metadata backend handle it?
# E.g. as of 5/9/2022 Metadata service's DB says VARCHAR(255).
# It is likely overkill to fail a flow over an over-flow. We should expect the
# backend to try to tolerate this (e.g. enlarge columns, truncation fallback).
metadata.extend(
MetaDatum(
field=str(k),
value=str(v),
type=str(k),
tags=["attempt_id:{0}".format(attempt)],
)
for k, v in sys_info.items()
)
# Also store code packaging information
code_sha = os.environ.get("METAFLOW_CODE_SHA")
if code_sha:
code_url = os.environ.get("METAFLOW_CODE_URL")
code_ds = os.environ.get("METAFLOW_CODE_DS")
code_metadata = os.environ.get("METAFLOW_CODE_METADATA")
metadata.append(
MetaDatum(
field="code-package",
value=json.dumps(
{
"ds_type": code_ds,
"sha": code_sha,
"location": code_url,
"metadata": code_metadata,
}
),
type="code-package",
tags=["attempt_id:{0}".format(attempt)],
)
)
# Add script name as metadata
script_name = self._environment.get_environment_info()["script"]
metadata.append(
MetaDatum(
field="script-name",
value=script_name,
type="script-name",
tags=["attempt_id:{0}".format(attempt)],
)
)
# And add git metadata
git_info = self._get_git_info_as_dict()
if git_info:
metadata.append(
MetaDatum(
field="git-info",
value=json.dumps(git_info),
type="git-info",
tags=["attempt_id:{0}".format(attempt)],
)
)
if metadata:
self.register_metadata(run_id, step_name, task_id, metadata)
@classmethod
def filter_tasks_by_metadata(
cls,
flow_name: str,
run_id: str,
step_name: str,
field_name: str,
pattern: str,
) -> List[str]:
"""
Filter tasks by metadata field and pattern, returning task pathspecs that match criteria.
Parameters
----------
flow_name : str
Flow name, that the run belongs to.
run_id: str
Run id, together with flow_id, that identifies the specific Run whose tasks to query
step_name: str
Step name to query tasks from
field_name: str
Metadata field name to query
pattern: str
Pattern to match in metadata field value
Returns
-------
List[str]
List of task pathspecs that satisfy the query
"""
raise NotImplementedError()
@staticmethod
def _apply_filter(elts, filters):
if filters is None:
return elts
starting_point = elts
result = []
for key, value in filters.items():
if key == "any_tags":
for obj in starting_point:
if value in obj.get("tags", []) or value in obj.get(
"system_tags", []
):
result.append(obj)
if key == "tags":
for obj in starting_point:
if value in obj.get("tags", []):
result.append(obj)
if key == "system_tags":
for obj in starting_point:
if value in obj.get("system_tags", []):
result.append(obj)
starting_point = result
result = []
return starting_point
@staticmethod
def _reconstruct_metadata_for_attempt(all_metadata, attempt_id):
have_all_attempt_id = True
attempts_start = {}
post_filter = []
for v in all_metadata:
if v["field_name"] == "attempt":
attempts_start[int(v["value"])] = v["ts_epoch"]
all_tags = v.get("tags")
if all_tags is None:
all_tags = []
for t in all_tags:
match_result = attempt_id_re.match(t)
if match_result:
if int(match_result.group(1)) == attempt_id:
post_filter.append(v)
break
else:
# We didn't encounter a match for attempt_id
have_all_attempt_id = False
if not have_all_attempt_id:
# We reconstruct base on the attempts_start
start_ts = attempts_start.get(attempt_id, -1)
if start_ts < 0:
return [] # No metadata since the attempt hasn't started
# Doubt we will be using Python in year 3000
end_ts = attempts_start.get(attempt_id + 1, 32503680000000)
post_filter = [
v
for v in all_metadata
if v["ts_epoch"] >= start_ts and v["ts_epoch"] < end_ts
]
return post_filter
def __init__(self, environment, flow, event_logger, monitor):
self._task_id_seq = -1
self.sticky_tags = set()
self.sticky_sys_tags = set()
self._flow_name = flow.name
self._event_logger = event_logger
self._monitor = monitor
self._environment = environment
self._runtime = os.environ.get("METAFLOW_RUNTIME_NAME", "dev")
self.add_sticky_tags(sys_tags=self._get_system_tags())
================================================
FILE: metaflow/metadata_provider/util.py
================================================
from io import BytesIO
import os
import shutil
import tarfile
from metaflow import util
from metaflow.plugins.datastores.local_storage import LocalStorage
def copy_tree(src, dst, update=False):
if not os.path.exists(dst):
os.makedirs(dst)
for item in os.listdir(src):
s = os.path.join(src, item)
d = os.path.join(dst, item)
if os.path.isdir(s):
copy_tree(s, d, update)
else:
if (
update
and os.path.exists(d)
and os.path.getmtime(s) <= os.path.getmtime(d)
):
continue
shutil.copy2(s, d)
def sync_local_metadata_to_datastore(metadata_local_dir, task_ds):
with util.TempDir() as td:
tar_file_path = os.path.join(td, "metadata.tgz")
buf = BytesIO()
with tarfile.open(name=tar_file_path, mode="w:gz", fileobj=buf) as tar:
tar.add(metadata_local_dir)
blob = buf.getvalue()
_, key = task_ds.parent_datastore.save_data([blob], len_hint=1)[0]
task_ds._dangerous_save_metadata_post_done({"local_metadata": key})
def sync_local_metadata_from_datastore(metadata_local_dir, task_ds):
def echo_none(*args, **kwargs):
pass
key_to_load = task_ds.load_metadata(["local_metadata"])["local_metadata"]
_, tarball = next(task_ds.parent_datastore.load_data([key_to_load]))
with util.TempDir() as td:
with tarfile.open(fileobj=BytesIO(tarball), mode="r:gz") as tar:
util.tar_safe_extract(tar, td)
copy_tree(
os.path.join(td, metadata_local_dir),
LocalStorage.get_datastore_root_from_config(echo_none),
update=True,
)
================================================
FILE: metaflow/metaflow_config.py
================================================
import os
import sys
import types
import uuid
import datetime
from typing import Dict, List, Union, Tuple as TTuple
from metaflow.exception import MetaflowException
from metaflow.metaflow_config_funcs import from_conf, get_validate_choice_fn
# Recursive type alias for JSON, used by Runner API type mappings
JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None]
# Disable multithreading security on MacOS
if sys.platform == "darwin":
os.environ["OBJC_DISABLE_INITIALIZE_FORK_SAFETY"] = "YES"
## NOTE: Just like Click's auto_envar_prefix `METAFLOW` (see in cli.py), all environment
## variables here are also named METAFLOW_XXX. So, for example, in the statement:
## `DEFAULT_DATASTORE = from_conf("DEFAULT_DATASTORE", "local")`, to override the default
## value, either set `METAFLOW_DEFAULT_DATASTORE` in your configuration file or set
## an environment variable called `METAFLOW_DEFAULT_DATASTORE`
##
# Constants (NOTE: these need to live before any from_conf)
##
# Path to the local directory to store artifacts for 'local' datastore.
DATASTORE_LOCAL_DIR = ".metaflow"
DATASTORE_SPIN_LOCAL_DIR = ".metaflow_spin"
# Local configuration file (in .metaflow) containing overrides per-project
LOCAL_CONFIG_FILE = "config.json"
###
# Default configuration
###
DEFAULT_DATASTORE = from_conf("DEFAULT_DATASTORE", "local")
DEFAULT_ENVIRONMENT = from_conf("DEFAULT_ENVIRONMENT", "local")
DEFAULT_EVENT_LOGGER = from_conf("DEFAULT_EVENT_LOGGER", "nullSidecarLogger")
DEFAULT_METADATA = from_conf("DEFAULT_METADATA", "local")
DEFAULT_MONITOR = from_conf("DEFAULT_MONITOR", "nullSidecarMonitor")
DEFAULT_PACKAGE_SUFFIXES = from_conf("DEFAULT_PACKAGE_SUFFIXES", ".py,.R,.RDS")
DEFAULT_AWS_CLIENT_PROVIDER = from_conf("DEFAULT_AWS_CLIENT_PROVIDER", "boto3")
DEFAULT_AZURE_CLIENT_PROVIDER = from_conf(
"DEFAULT_AZURE_CLIENT_PROVIDER", "azure-default"
)
DEFAULT_GCP_CLIENT_PROVIDER = from_conf("DEFAULT_GCP_CLIENT_PROVIDER", "gcp-default")
DEFAULT_SECRETS_BACKEND_TYPE = from_conf("DEFAULT_SECRETS_BACKEND_TYPE")
DEFAULT_SECRETS_ROLE = from_conf("DEFAULT_SECRETS_ROLE")
DEFAULT_FROM_DEPLOYMENT_IMPL = from_conf(
"DEFAULT_FROM_DEPLOYMENT_IMPL", "argo-workflows"
)
###
# Spin configuration
###
# Essentially a whitelist of decorators that are allowed in Spin steps
SPIN_ALLOWED_DECORATORS = from_conf(
"SPIN_ALLOWED_DECORATORS",
[
"conda",
"pypi",
"conda_base",
"pypi_base",
"environment",
"project",
"timeout",
"conda_env_internal",
"card",
],
)
# Essentially a blacklist of decorators that are not allowed in Spin steps
# Note: decorators not in either SPIN_ALLOWED_DECORATORS or SPIN_DISALLOWED_DECORATORS
# are simply ignored in Spin steps
SPIN_DISALLOWED_DECORATORS = from_conf(
"SPIN_DISALLOWED_DECORATORS",
[
"parallel",
],
)
# Default value for persist option in spin command
SPIN_PERSIST = from_conf("SPIN_PERSIST", False)
###
# User configuration
###
USER = from_conf("USER")
###
# Datastore configuration
###
DATASTORE_SYSROOT_LOCAL = from_conf("DATASTORE_SYSROOT_LOCAL")
DATASTORE_SYSROOT_SPIN = from_conf("DATASTORE_SYSROOT_SPIN")
# S3 bucket and prefix to store artifacts for 's3' datastore.
DATASTORE_SYSROOT_S3 = from_conf("DATASTORE_SYSROOT_S3")
# Azure Blob Storage container and blob prefix
DATASTORE_SYSROOT_AZURE = from_conf("DATASTORE_SYSROOT_AZURE")
DATASTORE_SYSROOT_GS = from_conf("DATASTORE_SYSROOT_GS")
# GS bucket and prefix to store artifacts for 'gs' datastore
###
# Datastore local cache
###
# Path to the client cache
CLIENT_CACHE_PATH = from_conf("CLIENT_CACHE_PATH", "/tmp/metaflow_client")
# Maximum size (in bytes) of the cache
CLIENT_CACHE_MAX_SIZE = from_conf("CLIENT_CACHE_MAX_SIZE", 10000)
# Maximum number of cached Flow and TaskDatastores in the cache
CLIENT_CACHE_MAX_FLOWDATASTORE_COUNT = from_conf(
"CLIENT_CACHE_MAX_FLOWDATASTORE_COUNT", 50
)
CLIENT_CACHE_MAX_TASKDATASTORE_COUNT = from_conf(
"CLIENT_CACHE_MAX_TASKDATASTORE_COUNT", CLIENT_CACHE_MAX_FLOWDATASTORE_COUNT * 100
)
###
# Datatools (S3) configuration
###
S3_ENDPOINT_URL = from_conf("S3_ENDPOINT_URL")
S3_VERIFY_CERTIFICATE = from_conf("S3_VERIFY_CERTIFICATE")
# Set ServerSideEncryption for S3 uploads
S3_SERVER_SIDE_ENCRYPTION = from_conf("S3_SERVER_SIDE_ENCRYPTION")
# S3 retry configuration
# This is useful if you want to "fail fast" on S3 operations; use with caution
# though as this may increase failures. Note that this is the number of *retries*
# so setting it to 0 means each operation will be tried once.
S3_RETRY_COUNT = from_conf("S3_RETRY_COUNT", 7)
# Number of concurrent S3 processes for parallel operations.
S3_WORKER_COUNT = from_conf("S3_WORKER_COUNT", 64)
# Number of retries on *transient* failures (such as SlowDown errors). Note
# that if after S3_TRANSIENT_RETRY_COUNT times, all operations haven't been done,
# it will try up to S3_RETRY_COUNT again so the total number of tries can be up to
# (S3_RETRY_COUNT + 1) * (S3_TRANSIENT_RETRY_COUNT + 1)
# You typically want this number fairly high as transient retires are "cheap" (only
# operations that have not succeeded retry as opposed to all operations for the
# top-level retries)
S3_TRANSIENT_RETRY_COUNT = from_conf("S3_TRANSIENT_RETRY_COUNT", 20)
# Whether to log transient retry messages to stdout
S3_LOG_TRANSIENT_RETRIES = from_conf("S3_LOG_TRANSIENT_RETRIES", False)
# S3 retry configuration used in the aws client
# Use the adaptive retry strategy by default
S3_CLIENT_RETRY_CONFIG = from_conf(
"S3_CLIENT_RETRY_CONFIG", {"max_attempts": 10, "mode": "adaptive"}
)
# Threshold to start printing warnings for an AWS retry
RETRY_WARNING_THRESHOLD = 3
# S3 datatools root location
DATATOOLS_SUFFIX = from_conf("DATATOOLS_SUFFIX", "data")
DATATOOLS_S3ROOT = from_conf(
"DATATOOLS_S3ROOT",
(
os.path.join(DATASTORE_SYSROOT_S3, DATATOOLS_SUFFIX)
if DATASTORE_SYSROOT_S3
else None
),
)
TEMPDIR = from_conf("TEMPDIR", ".")
DATATOOLS_CLIENT_PARAMS = from_conf("DATATOOLS_CLIENT_PARAMS", {})
if S3_ENDPOINT_URL:
DATATOOLS_CLIENT_PARAMS["endpoint_url"] = S3_ENDPOINT_URL
if S3_VERIFY_CERTIFICATE:
DATATOOLS_CLIENT_PARAMS["verify"] = S3_VERIFY_CERTIFICATE
DATATOOLS_SESSION_VARS = from_conf("DATATOOLS_SESSION_VARS", {})
# Azure datatools root location
# Note: we do not expose an actual datatools library for Azure (like we do for S3)
# Similar to DATATOOLS_LOCALROOT, this is used ONLY by the IncludeFile's internal implementation.
DATATOOLS_AZUREROOT = from_conf(
"DATATOOLS_AZUREROOT",
(
os.path.join(DATASTORE_SYSROOT_AZURE, DATATOOLS_SUFFIX)
if DATASTORE_SYSROOT_AZURE
else None
),
)
# GS datatools root location
# Note: we do not expose an actual datatools library for GS (like we do for S3)
# Similar to DATATOOLS_LOCALROOT, this is used ONLY by the IncludeFile's internal implementation.
DATATOOLS_GSROOT = from_conf(
"DATATOOLS_GSROOT",
(
os.path.join(DATASTORE_SYSROOT_GS, DATATOOLS_SUFFIX)
if DATASTORE_SYSROOT_GS
else None
),
)
# Local datatools root location
DATATOOLS_LOCALROOT = from_conf(
"DATATOOLS_LOCALROOT",
(
os.path.join(DATASTORE_SYSROOT_LOCAL, DATATOOLS_SUFFIX)
if DATASTORE_SYSROOT_LOCAL
else None
),
)
# Secrets Backend - AWS Secrets Manager configuration
AWS_SECRETS_MANAGER_DEFAULT_REGION = from_conf("AWS_SECRETS_MANAGER_DEFAULT_REGION")
AWS_SECRETS_MANAGER_DEFAULT_ROLE = from_conf("AWS_SECRETS_MANAGER_DEFAULT_ROLE")
# Secrets Backend - GCP Secrets name prefix. With this, users don't have
# to specify the full secret name in the @secret decorator.
#
# Note that it makes a difference whether the prefix ends with a slash or not
# E.g. if secret name passed to @secret decorator is mysecret:
# - "projects/1234567890/secrets/" -> "projects/1234567890/secrets/mysecret"
# - "projects/1234567890/secrets/foo-" -> "projects/1234567890/secrets/foo-mysecret"
GCP_SECRET_MANAGER_PREFIX = from_conf("GCP_SECRET_MANAGER_PREFIX")
# Secrets Backend - Azure Key Vault prefix. With this, users don't have to
# specify the full https:// vault url in the @secret decorator.
#
# It does not make a difference if the prefix ends in a / or not. We will handle either
# case correctly.
AZURE_KEY_VAULT_PREFIX = from_conf("AZURE_KEY_VAULT_PREFIX")
# The root directory to save artifact pulls in, when using S3 or Azure
ARTIFACT_LOCALROOT = from_conf("ARTIFACT_LOCALROOT", os.getcwd())
# Cards related config variables
CARD_SUFFIX = "mf.cards"
CARD_LOCALROOT = from_conf("CARD_LOCALROOT")
CARD_S3ROOT = from_conf(
"CARD_S3ROOT",
os.path.join(DATASTORE_SYSROOT_S3, CARD_SUFFIX) if DATASTORE_SYSROOT_S3 else None,
)
CARD_AZUREROOT = from_conf(
"CARD_AZUREROOT",
(
os.path.join(DATASTORE_SYSROOT_AZURE, CARD_SUFFIX)
if DATASTORE_SYSROOT_AZURE
else None
),
)
CARD_GSROOT = from_conf(
"CARD_GSROOT",
os.path.join(DATASTORE_SYSROOT_GS, CARD_SUFFIX) if DATASTORE_SYSROOT_GS else None,
)
CARD_NO_WARNING = from_conf("CARD_NO_WARNING", False)
RUNTIME_CARD_RENDER_INTERVAL = from_conf("RUNTIME_CARD_RENDER_INTERVAL", 60)
# Azure storage account URL
AZURE_STORAGE_BLOB_SERVICE_ENDPOINT = from_conf("AZURE_STORAGE_BLOB_SERVICE_ENDPOINT")
# Azure storage can use process-based parallelism instead of threads.
# Processes perform better for high throughput workloads (e.g. many huge artifacts)
AZURE_STORAGE_WORKLOAD_TYPE = from_conf(
"AZURE_STORAGE_WORKLOAD_TYPE",
default="general",
validate_fn=get_validate_choice_fn(["general", "high_throughput"]),
)
# GS storage can use process-based parallelism instead of threads.
# Processes perform better for high throughput workloads (e.g. many huge artifacts)
GS_STORAGE_WORKLOAD_TYPE = from_conf(
"GS_STORAGE_WORKLOAD_TYPE",
"general",
validate_fn=get_validate_choice_fn(["general", "high_throughput"]),
)
###
# Metadata configuration
###
SERVICE_URL = from_conf("SERVICE_URL")
SERVICE_RETRY_COUNT = from_conf("SERVICE_RETRY_COUNT", 5)
SERVICE_AUTH_KEY = from_conf("SERVICE_AUTH_KEY")
SERVICE_HEADERS = from_conf("SERVICE_HEADERS", {})
if SERVICE_AUTH_KEY is not None:
SERVICE_HEADERS["x-api-key"] = SERVICE_AUTH_KEY
# Checks version compatibility with Metadata service
SERVICE_VERSION_CHECK = from_conf("SERVICE_VERSION_CHECK", True)
# Default container image
DEFAULT_CONTAINER_IMAGE = from_conf("DEFAULT_CONTAINER_IMAGE")
# Default container registry
DEFAULT_CONTAINER_REGISTRY = from_conf("DEFAULT_CONTAINER_REGISTRY")
# Controls whether to include foreach stack information in metadata.
INCLUDE_FOREACH_STACK = from_conf("INCLUDE_FOREACH_STACK", True)
# Maximum length of the foreach value string to be stored in each ForeachFrame.
MAXIMUM_FOREACH_VALUE_CHARS = from_conf("MAXIMUM_FOREACH_VALUE_CHARS", 30)
# The default runtime limit (In seconds) of jobs launched by any compute provider. Default of 5 days.
DEFAULT_RUNTIME_LIMIT = from_conf("DEFAULT_RUNTIME_LIMIT", 5 * 24 * 60 * 60)
###
# Organization customizations
###
UI_URL = from_conf("UI_URL")
###
# Capture error logs from argo
###
ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT = from_conf("ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT")
# Contact information displayed when running the `metaflow` command.
# Value should be a dictionary where:
# - key is a string describing contact method
# - value is a string describing contact itself (email, web address, etc.)
# The default value shows an example of this
CONTACT_INFO = from_conf(
"CONTACT_INFO",
{
"Read the documentation": "http://docs.metaflow.org",
"Chat with us": "http://chat.metaflow.org",
"Get help by email": "help@metaflow.org",
},
)
###
# Decorators
###
# Format is a space separated string of decospecs (what is passed
# using --with)
DEFAULT_DECOSPECS = from_conf("DEFAULT_DECOSPECS", "")
###
# AWS Batch configuration
###
# IAM role for AWS Batch container with Amazon S3 access
# (and AWS DynamoDb access for AWS StepFunctions, if enabled)
ECS_S3_ACCESS_IAM_ROLE = from_conf("ECS_S3_ACCESS_IAM_ROLE")
# IAM role for AWS Batch container for AWS Fargate
ECS_FARGATE_EXECUTION_ROLE = from_conf("ECS_FARGATE_EXECUTION_ROLE")
# Job queue for AWS Batch
BATCH_JOB_QUEUE = from_conf("BATCH_JOB_QUEUE")
# Default container image for AWS Batch
BATCH_CONTAINER_IMAGE = from_conf("BATCH_CONTAINER_IMAGE", DEFAULT_CONTAINER_IMAGE)
# Default container registry for AWS Batch
BATCH_CONTAINER_REGISTRY = from_conf(
"BATCH_CONTAINER_REGISTRY", DEFAULT_CONTAINER_REGISTRY
)
# Metadata service URL for AWS Batch
SERVICE_INTERNAL_URL = from_conf("SERVICE_INTERNAL_URL", SERVICE_URL)
# Assign resource tags to AWS Batch jobs. Set to False by default since
# it requires `Batch:TagResource` permissions which may not be available
# in all Metaflow deployments. Hopefully, some day we can flip the
# default to True.
BATCH_EMIT_TAGS = from_conf("BATCH_EMIT_TAGS", False)
# Default tags to add to AWS Batch jobs. These are in addition to the defaults set when BATCH_EMIT_TAGS is true.
BATCH_DEFAULT_TAGS = from_conf("BATCH_DEFAULT_TAGS", {})
###
# AWS Step Functions configuration
###
# IAM role for AWS Step Functions with AWS Batch and AWS DynamoDb access
# https://docs.aws.amazon.com/step-functions/latest/dg/batch-iam.html
SFN_IAM_ROLE = from_conf("SFN_IAM_ROLE")
# AWS DynamoDb Table name (with partition key - `pathspec` of type string)
SFN_DYNAMO_DB_TABLE = from_conf("SFN_DYNAMO_DB_TABLE")
# IAM role for AWS Events with AWS Step Functions access
# https://docs.aws.amazon.com/eventbridge/latest/userguide/auth-and-access-control-eventbridge.html
EVENTS_SFN_ACCESS_IAM_ROLE = from_conf("EVENTS_SFN_ACCESS_IAM_ROLE")
# Prefix for AWS Step Functions state machines. Set to stack name for Metaflow
# sandbox.
SFN_STATE_MACHINE_PREFIX = from_conf("SFN_STATE_MACHINE_PREFIX")
# Optional AWS CloudWatch Log Group ARN for emitting AWS Step Functions state
# machine execution logs. This needs to be available when using the
# `step-functions create --log-execution-history` command.
SFN_EXECUTION_LOG_GROUP_ARN = from_conf("SFN_EXECUTION_LOG_GROUP_ARN")
# Amazon S3 path for storing the results of AWS Step Functions Distributed Map
SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH = from_conf(
"SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH",
(
os.path.join(DATASTORE_SYSROOT_S3, "sfn_distributed_map_output")
if DATASTORE_SYSROOT_S3
else None
),
)
# Toggle for step command being part of the Step Function payload, or if it should be offloaded to S3
SFN_COMPRESS_STATE_MACHINE = from_conf("SFN_COMPRESS_STATE_MACHINE", False)
###
# Kubernetes configuration
###
# Kubernetes namespace to use for all objects created by Metaflow
KUBERNETES_NAMESPACE = from_conf("KUBERNETES_NAMESPACE", "default")
# Default service account to use by K8S jobs created by Metaflow
KUBERNETES_SERVICE_ACCOUNT = from_conf("KUBERNETES_SERVICE_ACCOUNT")
# Default node selectors to use by K8S jobs created by Metaflow - foo=bar,baz=bab
KUBERNETES_NODE_SELECTOR = from_conf("KUBERNETES_NODE_SELECTOR", "")
KUBERNETES_TOLERATIONS = from_conf("KUBERNETES_TOLERATIONS", "")
KUBERNETES_PERSISTENT_VOLUME_CLAIMS = from_conf(
"KUBERNETES_PERSISTENT_VOLUME_CLAIMS", ""
)
KUBERNETES_SECRETS = from_conf("KUBERNETES_SECRETS", "")
# Default labels for kubernetes pods
KUBERNETES_LABELS = from_conf("KUBERNETES_LABELS", "")
# Default annotations for kubernetes pods
KUBERNETES_ANNOTATIONS = from_conf("KUBERNETES_ANNOTATIONS", "")
# Default GPU vendor to use by K8S jobs created by Metaflow (supports nvidia, amd)
KUBERNETES_GPU_VENDOR = from_conf("KUBERNETES_GPU_VENDOR", "nvidia")
# Default container image for K8S
KUBERNETES_CONTAINER_IMAGE = from_conf(
"KUBERNETES_CONTAINER_IMAGE", DEFAULT_CONTAINER_IMAGE
)
# Image pull policy for container images
KUBERNETES_IMAGE_PULL_POLICY = from_conf("KUBERNETES_IMAGE_PULL_POLICY", None)
# Image pull secrets for container images
KUBERNETES_IMAGE_PULL_SECRETS = from_conf("KUBERNETES_IMAGE_PULL_SECRETS", "")
# Default container registry for K8S
KUBERNETES_CONTAINER_REGISTRY = from_conf(
"KUBERNETES_CONTAINER_REGISTRY", DEFAULT_CONTAINER_REGISTRY
)
# Toggle for trying to fetch EC2 instance metadata
KUBERNETES_FETCH_EC2_METADATA = from_conf("KUBERNETES_FETCH_EC2_METADATA", False)
# Shared memory in MB to use for this step
KUBERNETES_SHARED_MEMORY = from_conf("KUBERNETES_SHARED_MEMORY", None)
# Default port number to open on the pods
KUBERNETES_PORT = from_conf("KUBERNETES_PORT", None)
# Default kubernetes resource requests for CPU, memory and disk
KUBERNETES_CPU = from_conf("KUBERNETES_CPU", None)
KUBERNETES_MEMORY = from_conf("KUBERNETES_MEMORY", None)
KUBERNETES_DISK = from_conf("KUBERNETES_DISK", None)
# Default kubernetes QoS class
KUBERNETES_QOS = from_conf("KUBERNETES_QOS", "burstable")
# Architecture of kubernetes nodes - used for @conda/@pypi in metaflow-dev
KUBERNETES_CONDA_ARCH = from_conf("KUBERNETES_CONDA_ARCH")
ARGO_WORKFLOWS_KUBERNETES_SECRETS = from_conf("ARGO_WORKFLOWS_KUBERNETES_SECRETS", "")
ARGO_WORKFLOWS_ENV_VARS_TO_SKIP = from_conf("ARGO_WORKFLOWS_ENV_VARS_TO_SKIP", "")
KUBERNETES_JOBSET_GROUP = from_conf("KUBERNETES_JOBSET_GROUP", "jobset.x-k8s.io")
KUBERNETES_JOBSET_VERSION = from_conf("KUBERNETES_JOBSET_VERSION", "v1alpha2")
KUBERNETES_JOB_TERMINATE_MODE = from_conf("KUBERNETES_JOB_TERMINATE_MODE", "stop")
##
# Argo Events Configuration
##
ARGO_EVENTS_SERVICE_ACCOUNT = from_conf("ARGO_EVENTS_SERVICE_ACCOUNT")
ARGO_EVENTS_EVENT_BUS = from_conf("ARGO_EVENTS_EVENT_BUS", "default")
ARGO_EVENTS_EVENT_SOURCE = from_conf("ARGO_EVENTS_EVENT_SOURCE")
ARGO_EVENTS_EVENT = from_conf("ARGO_EVENTS_EVENT")
ARGO_EVENTS_WEBHOOK_URL = from_conf("ARGO_EVENTS_WEBHOOK_URL")
ARGO_EVENTS_INTERNAL_WEBHOOK_URL = from_conf(
"ARGO_EVENTS_INTERNAL_WEBHOOK_URL", ARGO_EVENTS_WEBHOOK_URL
)
ARGO_EVENTS_WEBHOOK_AUTH = from_conf("ARGO_EVENTS_WEBHOOK_AUTH", "none")
ARGO_EVENTS_SENSOR_NAMESPACE = from_conf(
"ARGO_EVENTS_SENSOR_NAMESPACE", KUBERNETES_NAMESPACE
)
# Prefix for namespaced events (used by @trigger with namespaced=True)
NAMESPACED_EVENTS_PREFIX = from_conf("NAMESPACED_EVENTS_PREFIX", "mfns")
ARGO_WORKFLOWS_UI_URL = from_conf("ARGO_WORKFLOWS_UI_URL")
##
# Airflow Configuration
##
# This configuration sets `startup_timeout_seconds` in airflow's KubernetesPodOperator.
AIRFLOW_KUBERNETES_STARTUP_TIMEOUT_SECONDS = from_conf(
"AIRFLOW_KUBERNETES_STARTUP_TIMEOUT_SECONDS", 60 * 60
)
# This configuration sets `kubernetes_conn_id` in airflow's KubernetesPodOperator.
AIRFLOW_KUBERNETES_CONN_ID = from_conf("AIRFLOW_KUBERNETES_CONN_ID")
AIRFLOW_KUBERNETES_KUBECONFIG_FILE = from_conf("AIRFLOW_KUBERNETES_KUBECONFIG_FILE")
AIRFLOW_KUBERNETES_KUBECONFIG_CONTEXT = from_conf(
"AIRFLOW_KUBERNETES_KUBECONFIG_CONTEXT"
)
###
# Conda configuration
###
# Conda package root location on S3
CONDA_PACKAGE_S3ROOT = from_conf("CONDA_PACKAGE_S3ROOT")
# Conda package root location on Azure
CONDA_PACKAGE_AZUREROOT = from_conf("CONDA_PACKAGE_AZUREROOT")
# Conda package root location on GS
CONDA_PACKAGE_GSROOT = from_conf("CONDA_PACKAGE_GSROOT")
# Use an alternate dependency resolver for conda packages instead of conda
# Mamba promises faster package dependency resolution times, which
# should result in an appreciable speedup in flow environment initialization.
CONDA_DEPENDENCY_RESOLVER = from_conf("CONDA_DEPENDENCY_RESOLVER", "conda")
# Default to not using fast init binary.
CONDA_USE_FAST_INIT = from_conf("CONDA_USE_FAST_INIT", False)
###
# Escape hatch configuration
###
# Print out warning if escape hatch is not used for the target packages
ESCAPE_HATCH_WARNING = from_conf("ESCAPE_HATCH_WARNING", True)
###
# Features
###
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE = from_conf("FEAT_ALWAYS_UPLOAD_CODE_PACKAGE", False)
###
# Profile
###
PROFILE_FROM_START = from_conf("PROFILE_FROM_START", False)
###
# Debug configuration
###
DEBUG_OPTIONS = [
"subcommand",
"sidecar",
"s3client",
"tracing",
"stubgen",
"userconf",
"conda",
"package",
]
for typ in DEBUG_OPTIONS:
vars()["DEBUG_%s" % typ.upper()] = from_conf("DEBUG_%s" % typ.upper(), False)
###
# Plugin configuration
###
# Plugin configuration variables exist in plugins/__init__.py.
# Specifically, there is an ENABLED_ configuration value to determine
# the set of plugins to enable. The categories are: step_decorator, flow_decorator,
# environment, metadata_provider, datastore, sidecar, logging_sidecar, monitor_sidecar,
# aws_client_provider, and cli. If not set (the default), all plugins are enabled.
# You can restrict which plugins are enabled by listing them explicitly, for example
# ENABLED_STEP_DECORATOR = ["batch", "resources"] will enable only those two step
# decorators and none other.
###
# Command configuration
###
# Command (ie: metaflow ) configuration variable ENABLED_CMD
# exists in cmd/main_cli.py. It behaves just like any of the other ENABLED_
# configuration variables.
###
# AWS Sandbox configuration
###
# Boolean flag for metaflow AWS sandbox access
AWS_SANDBOX_ENABLED = from_conf("AWS_SANDBOX_ENABLED", False)
# Metaflow AWS sandbox auth endpoint
AWS_SANDBOX_STS_ENDPOINT_URL = SERVICE_URL
# Metaflow AWS sandbox API auth key
AWS_SANDBOX_API_KEY = from_conf("AWS_SANDBOX_API_KEY")
# Internal Metadata URL
AWS_SANDBOX_INTERNAL_SERVICE_URL = from_conf("AWS_SANDBOX_INTERNAL_SERVICE_URL")
# AWS region
AWS_SANDBOX_REGION = from_conf("AWS_SANDBOX_REGION")
# Finalize configuration
if AWS_SANDBOX_ENABLED:
os.environ["AWS_DEFAULT_REGION"] = AWS_SANDBOX_REGION
SERVICE_INTERNAL_URL = AWS_SANDBOX_INTERNAL_SERVICE_URL
SERVICE_HEADERS["x-api-key"] = AWS_SANDBOX_API_KEY
SFN_STATE_MACHINE_PREFIX = from_conf("AWS_SANDBOX_STACK_NAME")
KUBERNETES_SANDBOX_INIT_SCRIPT = from_conf("KUBERNETES_SANDBOX_INIT_SCRIPT")
OTEL_ENDPOINT = from_conf("OTEL_ENDPOINT")
ZIPKIN_ENDPOINT = from_conf("ZIPKIN_ENDPOINT")
CONSOLE_TRACE_ENABLED = from_conf("CONSOLE_TRACE_ENABLED", False)
# internal env used for preventing the tracing module from loading during Conda bootstrapping.
DISABLE_TRACING = bool(os.environ.get("DISABLE_TRACING", False))
# MAX_ATTEMPTS is the maximum number of attempts, including the first
# task, retries, and the final fallback task and its retries.
#
# Datastore needs to check all attempt files to find the latest one, so
# increasing this limit has real performance implications for all tasks.
# Decreasing this limit is very unsafe, as it can lead to wrong results
# being read from old tasks.
#
# Note also that DataStoreSet resolves the latest attempt_id using
# lexicographic ordering of attempts. This won't work if MAX_ATTEMPTS > 99.
MAX_ATTEMPTS = 6
# Feature flag (experimental features that are *explicitly* unsupported)
# Process configs even when using the click_api for Runner/Deployer
CLICK_API_PROCESS_CONFIG = from_conf("CLICK_API_PROCESS_CONFIG", True)
# PINNED_CONDA_LIBS are the libraries that metaflow depends on for execution
# and are needed within a conda environment
def get_pinned_conda_libs(python_version, datastore_type):
pins = {
"requests": ">=2.21.0",
}
if datastore_type == "s3":
pins["boto3"] = ">=1.14.0"
elif datastore_type == "azure":
pins["azure-identity"] = ">=1.10.0"
pins["azure-storage-blob"] = ">=12.12.0"
pins["azure-keyvault-secrets"] = ">=4.7.0"
pins["simple-azure-blob-downloader"] = ">=0.1.0"
elif datastore_type == "gs":
pins["google-cloud-storage"] = ">=2.5.0"
pins["google-auth"] = ">=2.11.0"
pins["google-cloud-secret-manager"] = ">=2.10.0"
pins["simple-gcp-object-downloader"] = ">=0.1.0"
pins["packaging"] = ">=24.0"
elif datastore_type == "local":
pass
else:
raise MetaflowException(
msg="conda lib pins for datastore %s are undefined" % (datastore_type,)
)
return pins
###
# Runner API type mappings
# Extensions can add custom Click parameter types via get_click_to_python_types
###
def get_click_to_python_types():
"""
Returns the mapping from Click parameter types to Python types for Runner API.
Extensions can override this function to add custom type mappings.
"""
# Imports are local to avoid circular dependencies:
# metaflow_config -> includefile -> plugins -> ... -> config_options -> debug -> metaflow_config
from metaflow._vendor.click.types import (
BoolParamType,
Choice,
DateTime,
File,
FloatParamType,
IntParamType,
Path,
StringParamType,
Tuple,
UUIDParameterType,
)
from metaflow.parameters import JSONTypeClass
from metaflow.includefile import FilePathClass
from metaflow.user_configs.config_options import (
LocalFileInput,
MultipleTuple,
ConfigValue,
)
return {
StringParamType: str,
IntParamType: int,
FloatParamType: float,
BoolParamType: bool,
UUIDParameterType: uuid.UUID,
Path: str,
DateTime: datetime.datetime,
Tuple: tuple,
Choice: str,
File: str,
JSONTypeClass: JSON,
FilePathClass: str,
LocalFileInput: str,
MultipleTuple: TTuple[str, Union[JSON, ConfigValue]],
}
# Check if there are extensions to Metaflow to load and override everything
try:
from metaflow.extension_support import get_modules
_TOGGLE_DECOSPECS = []
ext_modules = get_modules("config")
for m in ext_modules:
# We load into globals whatever we have in extension_module
# We specifically exclude any modules that may be included (like sys, os, etc)
for n, o in m.module.__dict__.items():
if n == "DEBUG_OPTIONS":
DEBUG_OPTIONS.extend(o)
for typ in o:
vars()["DEBUG_%s" % typ.upper()] = from_conf(
"DEBUG_%s" % typ.upper(), False
)
elif n == "get_pinned_conda_libs":
def _new_get_pinned_conda_libs(
python_version, datastore_type, f1=globals()[n], f2=o
):
d1 = f1(python_version, datastore_type)
d2 = f2(python_version, datastore_type)
for k, v in d2.items():
d1[k] = v if k not in d1 else ",".join([d1[k], v])
return d1
globals()[n] = _new_get_pinned_conda_libs
elif n == "TOGGLE_DECOSPECS":
if any([x.startswith("-") for x in o]):
raise ValueError("Removing decospecs is not currently supported")
if any(" " in x for x in o):
raise ValueError("Decospecs cannot contain spaces")
_TOGGLE_DECOSPECS.extend(o)
elif n == "get_click_to_python_types":
# Extension provides additional Click type mappings for Runner API
# Merge extension's types with base types
def _new_get_click_to_python_types(f1=globals()[n], f2=o):
d1 = f1()
d2 = f2()
d1.update(d2)
return d1
globals()[n] = _new_get_click_to_python_types
elif not n.startswith("__") and not isinstance(o, types.ModuleType):
globals()[n] = o
# If DEFAULT_DECOSPECS is set, use that, else extrapolate from extensions
if not DEFAULT_DECOSPECS:
DEFAULT_DECOSPECS = " ".join(_TOGGLE_DECOSPECS)
finally:
# Erase all temporary names to avoid leaking things
for _n in [
"m",
"n",
"o",
"typ",
"ext_modules",
"get_modules",
"_new_get_pinned_conda_libs",
"d1",
"d2",
"k",
"v",
"f1",
"f2",
"_TOGGLE_DECOSPECS",
]:
try:
del globals()[_n]
except KeyError:
pass
del globals()["_n"]
================================================
FILE: metaflow/metaflow_config_funcs.py
================================================
import json
import os
from collections import namedtuple
from metaflow.exception import MetaflowException
from metaflow.util import is_stringish
ConfigValue = namedtuple("ConfigValue", "value serializer is_default")
NON_CHANGED_VALUES = 1
NULL_VALUES = 2
ALL_VALUES = 3
def init_config():
# Read configuration from $METAFLOW_HOME/config_.json.
home = os.environ.get("METAFLOW_HOME", "~/.metaflowconfig")
profile = os.environ.get("METAFLOW_PROFILE")
path_to_config = os.path.join(home, "config.json")
if profile:
path_to_config = os.path.join(home, "config_%s.json" % profile)
path_to_config = os.path.expanduser(path_to_config)
config = {}
if os.path.exists(path_to_config):
with open(path_to_config, encoding="utf-8") as f:
return json.load(f)
elif profile:
raise MetaflowException(
"Unable to locate METAFLOW_PROFILE '%s' in '%s')" % (profile, home)
)
return config
def init_local_config():
# This function is heavily inspired from LocalStorage.get_datastore_root_from_config
# but simplifies certain things and also does not depend on DATASTORE_SYSROOT_LOCAL.
#
# In other words, since this config is meant to be local to a directory, it does not
# check in DATASTORE_SYSROOT_LOCAL but only up the current getcwd() path. This also
# prevents nasty circular dependencies :)
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, LOCAL_CONFIG_FILE
current_path = os.getcwd()
check_dir = os.path.join(current_path, DATASTORE_LOCAL_DIR)
check_dir = os.path.realpath(check_dir)
while not os.path.isdir(check_dir):
new_path = os.path.dirname(current_path)
if new_path == current_path: # No longer making upward progress
return {}
current_path = new_path
check_dir = os.path.join(current_path, DATASTORE_LOCAL_DIR)
path_to_config = os.path.join(check_dir, LOCAL_CONFIG_FILE)
# We found a directory to look for the config file in
if os.path.exists(path_to_config):
with open(path_to_config, encoding="utf-8") as f:
return json.load(f)
return {}
# Initialize defaults required to setup environment variables.
# (initialized lazily in from_conf since init_local_config requires
# some configuration values
METAFLOW_CONFIG = None
METAFLOW_LOCAL_CONFIG = None
_all_configs = {}
def config_values(include=0):
# By default, we just return non-null values and that
# are not default. This is the common use case because in all other cases, the code
# is sufficient to recreate the value (ie: there is no external source for the value)
for name, config_value in _all_configs.items():
if (config_value.value is not None or include & NULL_VALUES) and (
not config_value.is_default or include & NON_CHANGED_VALUES
):
yield name, config_value.serializer(config_value.value)
def from_conf(name, default=None, validate_fn=None):
"""
Pull value from the environment or configuration.
Order is:
1. Environment (use any environment variable explicitly set by user)
2. Local config (use any value set in the local config file -- so stuff in
.metaflow/project.json for example)
3. Global config (use any value set in the global config file)
4. Default
Prior to a value being returned, we will validate using validate_fn (if provided).
Only non-None values are validated.
validate_fn should accept (name, value).
If the value validates, return None, else raise an MetaflowException.
"""
global METAFLOW_CONFIG, METAFLOW_LOCAL_CONFIG
if METAFLOW_CONFIG is None:
METAFLOW_CONFIG = init_config()
if METAFLOW_LOCAL_CONFIG is None:
METAFLOW_LOCAL_CONFIG = init_local_config()
is_default = True
env_name = "METAFLOW_%s" % name
value = os.environ.get(
env_name,
METAFLOW_LOCAL_CONFIG.get(env_name, METAFLOW_CONFIG.get(env_name, default)),
)
if validate_fn and value is not None:
validate_fn(env_name, value)
if default is not None:
# In this case, value is definitely not None because default is the ultimate
# fallback and all other cases will return a string (even if an empty string)
if isinstance(default, (list, dict)):
# If we used the default, value is already a list or dict, else it is a
# string so we can just compare types to determine is_default
if isinstance(value, (list, dict)):
is_default = True
else:
try:
value = json.loads(value)
except json.JSONDecodeError:
raise ValueError(
"Expected a valid JSON for %s, got: %s" % (env_name, value)
)
if type(value) != type(default):
raise ValueError(
"Expected value of type '%s' for %s, got: %s"
% (type(default), env_name, value)
)
is_default = value == default
_all_configs[env_name] = ConfigValue(
value=value,
serializer=json.dumps,
is_default=is_default,
)
return value
elif isinstance(default, (bool, int, float)) or is_stringish(default):
try:
if type(value) != type(default):
if isinstance(default, bool):
# Env vars are strings so try to evaluate logically
value = value.lower() not in ("0", "false", "")
else:
value = type(default)(value)
is_default = value == default
except ValueError:
raise ValueError(
"Expected a %s for %s, got: %s" % (type(default), env_name, value)
)
else:
raise RuntimeError(
"Default of type %s for %s is not supported" % (type(default), env_name)
)
else:
is_default = value is None
_all_configs[env_name] = ConfigValue(
value=value,
serializer=str,
is_default=is_default,
)
return value
def get_validate_choice_fn(choices):
"""Returns a validate_fn for use with from_conf().
The validate_fn will check a value against a list of allowed choices.
"""
def _validate_choice(name, value):
if value not in choices:
raise MetaflowException(
"%s must be set to one of %s. Got '%s'." % (name, choices, value)
)
return _validate_choice
================================================
FILE: metaflow/metaflow_current.py
================================================
from collections import namedtuple
import os
from typing import Any, Optional, TYPE_CHECKING
from metaflow.metaflow_config import TEMPDIR
Parallel = namedtuple(
"Parallel", ["main_ip", "num_nodes", "node_index", "control_task_id"]
)
if TYPE_CHECKING:
import metaflow
class Current(object):
def __init__(self):
self._flow_name = None
self._run_id = None
self._step_name = None
self._task_id = None
self._retry_count = None
self._origin_run_id = None
self._namespace = None
self._username = None
self._metadata_str = None
self._is_running = False
self._tempdir = TEMPDIR
def _raise(ex):
raise ex
self.__class__.graph = property(
fget=lambda self: _raise(RuntimeError("Graph is not available"))
)
def _set_env(
self,
flow=None,
run_id=None,
step_name=None,
task_id=None,
retry_count=None,
origin_run_id=None,
namespace=None,
username=None,
metadata_str=None,
is_running=True,
tags=None,
):
if flow is not None:
self._flow_name = flow.name
self.__class__.graph = property(fget=lambda _, flow=flow: flow._graph_info)
self._run_id = run_id
self._step_name = step_name
self._task_id = task_id
self._retry_count = retry_count
self._origin_run_id = origin_run_id
self._namespace = namespace
self._username = username
self._metadata_str = metadata_str
self._is_running = is_running
self._tags = tags
def _update_env(self, env):
for k, v in env.items():
setattr(self.__class__, k, property(fget=lambda _, v=v: v))
def __contains__(self, key: str):
return getattr(self, key, None) is not None
def get(self, key: str, default=None) -> Optional[Any]:
return getattr(self, key, default)
@property
def is_running_flow(self) -> bool:
"""
Returns True if called inside a running Flow, False otherwise.
You can use this property e.g. inside a library to choose the desired
behavior depending on the execution context.
Returns
-------
bool
True if called inside a run, False otherwise.
"""
return self._is_running
@property
def flow_name(self) -> Optional[str]:
"""
The name of the currently executing flow.
Returns
-------
str, optional
Flow name.
"""
return self._flow_name
@property
def run_id(self) -> Optional[str]:
"""
The run ID of the currently executing run.
Returns
-------
str, optional
Run ID.
"""
return self._run_id
@property
def step_name(self) -> Optional[str]:
"""
The name of the currently executing step.
Returns
-------
str, optional
Step name.
"""
return self._step_name
@property
def task_id(self) -> Optional[str]:
"""
The task ID of the currently executing task.
Returns
-------
str, optional
Task ID.
"""
return self._task_id
@property
def retry_count(self) -> int:
"""
The index of the task execution attempt.
This property returns 0 for the first attempt to execute the task.
If the @retry decorator is used and the first attempt fails, this
property returns the number of times the task was attempted prior
to the current attempt.
Returns
-------
int
The retry count.
"""
return self._retry_count
@property
def origin_run_id(self) -> Optional[str]:
"""
The run ID of the original run this run was resumed from.
This property returns None for ordinary runs. If the run
was started by the resume command, the property returns
the ID of the original run.
You can use this property to detect if the run is resumed
or not.
Returns
-------
str, optional
Run ID of the original run.
"""
return self._origin_run_id
@property
def pathspec(self) -> Optional[str]:
"""
Pathspec of the current task, i.e. a unique
identifier of the current task. The returned
string follows this format:
```
{flow_name}/{run_id}/{step_name}/{task_id}
```
This is a shorthand to `current.task.pathspec`.
Returns
-------
str, optional
Pathspec.
"""
pathspec_components = (
self._flow_name,
self._run_id,
self._step_name,
self._task_id,
)
if any(v is None for v in pathspec_components):
return None
return "/".join(pathspec_components)
@property
def task(self) -> Optional["metaflow.Task"]:
"""
Task object of the current task.
Returns
-------
Task, optional
Current task.
"""
from metaflow import Task # Prevent circular dependency
pathspec_components = (
self._flow_name,
self._run_id,
self._step_name,
self._task_id,
)
if any(v is None for v in pathspec_components):
return None
return Task("/".join(pathspec_components), _namespace_check=False)
@property
def run(self) -> Optional["metaflow.Run"]:
"""
Run object of the current run.
Returns
-------
Run, optional
Current run.
"""
from metaflow import Run # Prevent circular dependency
pathspec_components = (self._flow_name, self._run_id)
if any(v is None for v in pathspec_components):
return None
return Run("/".join(pathspec_components), _namespace_check=False)
@property
def namespace(self) -> str:
"""
The current namespace.
Returns
-------
str
Namespace.
"""
return self._namespace
@property
def username(self) -> Optional[str]:
"""
The name of the user who started the run, if available.
Returns
-------
str, optional
User name.
"""
return self._username
@property
def tags(self):
"""
[Legacy function - do not use]
Access tags through the Run object instead.
"""
return self._tags
@property
def tempdir(self) -> Optional[str]:
"""
Currently configured temporary directory.
Returns
-------
str, optional
Temporary director.
"""
return self._tempdir
# instantiate the Current singleton. This will be populated
# by task.MetaflowTask before a task is executed.
current = Current()
================================================
FILE: metaflow/metaflow_environment.py
================================================
import json
import os
import platform
import sys
from .util import get_username
from . import metaflow_version
from . import metaflow_git
from metaflow.exception import MetaflowException
from metaflow.extension_support import dump_module_info
from metaflow.mflog import BASH_MFLOG, BASH_FLUSH_LOGS
from metaflow.package import MetaflowPackage
from . import R
class InvalidEnvironmentException(MetaflowException):
headline = "Incompatible environment"
class MetaflowEnvironment(object):
TYPE = "local"
def __init__(self, flow):
pass
def init_environment(self, echo):
"""
Run before any step decorators are initialized.
"""
pass
def validate_environment(self, echo, datastore_type):
"""
Run before any command to validate that we are operating in
a desired environment.
"""
pass
def decospecs(self):
"""
Environment may insert decorators, equivalent to setting --with
options on the command line.
"""
return ()
def bootstrap_commands(self, step_name, datastore_type):
"""
A list of shell commands to bootstrap this environment in a remote runtime.
"""
return []
def add_to_package(self):
"""
Called to add custom files needed for this environment. This hook will be
called in the `MetaflowPackage` class where metaflow compiles the code package
tarball. This hook can return one of two things (the first is for backwards
compatibility -- move to the second):
- a generator yielding a tuple of `(file_path, arcname)` to add files to
the code package. `file_path` is the path to the file on the local filesystem
and `arcname` is the path relative to the packaged code.
- a generator yielding a tuple of `(content, arcname, type)` where:
- type is one of
ContentType.{USER_CONTENT, CODE_CONTENT, MODULE_CONTENT, OTHER_CONTENT}
- for USER_CONTENT:
- the file will be included relative to the directory containing the
user's flow file.
- content: path to the file to include
- arcname: path relative to the directory containing the user's flow file
- for CODE_CONTENT:
- the file will be included relative to the code directory in the package.
This will be the directory containing `metaflow`.
- content: path to the file to include
- arcname: path relative to the code directory in the package
- for MODULE_CONTENT:
- the module will be added to the code package as a python module. It will
be accessible as usual (import )
- content: name of the module
- arcname: None (ignored)
- for OTHER_CONTENT:
- the file will be included relative to any other configuration/metadata
files for the flow
- content: path to the file to include
- arcname: path relative to the config directory in the package
"""
return []
def pylint_config(self):
"""
Environment may override pylint config.
"""
return []
@classmethod
def get_client_info(cls, flow_name, metadata):
"""
Environment may customize the information returned to the client about the environment
Parameters
----------
flow_name : str
Name of the flow
metadata : dict
Metadata information regarding the task
Returns
-------
str : Information printed and returned to the user
"""
return "Local environment"
def _get_download_code_package_cmd(self, code_package_url, datastore_type):
"""Return a command that downloads the code package from the datastore. We use various
cloud storage CLI tools because we don't have access to Metaflow codebase (which we
are about to download in the command).
The command should download the package to "job.tar" in the current directory.
It should work silently if everything goes well.
"""
if datastore_type == "s3":
from .plugins.aws.aws_utils import parse_s3_full_path
bucket, s3_object = parse_s3_full_path(code_package_url)
# NOTE: the script quoting is extremely sensitive due to the way shlex.split operates and this being inserted
# into a quoted command elsewhere.
# NOTE: Reason for the extra conditionals in the script are because
# Boto3 does not play well with passing None or an empty string to endpoint_url
return "{python} -c '{script}'".format(
python=self._python(),
script='import boto3, os; ep=os.getenv(\\"METAFLOW_S3_ENDPOINT_URL\\"); boto3.client(\\"s3\\", **({\\"endpoint_url\\":ep} if ep else {})).download_file(\\"%s\\", \\"%s\\", \\"job.tar\\")'
% (bucket, s3_object),
)
elif datastore_type == "azure":
from .plugins.azure.azure_utils import parse_azure_full_path
container_name, blob = parse_azure_full_path(code_package_url)
# remove a trailing slash, if present
blob_endpoint = "${METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT%/}"
return "download-azure-blob --blob-endpoint={blob_endpoint} --container={container} --blob={blob} --output-file=job.tar".format(
blob_endpoint=blob_endpoint,
blob=blob,
container=container_name,
)
elif datastore_type == "gs":
from .plugins.gcp.gs_utils import parse_gs_full_path
bucket_name, gs_object = parse_gs_full_path(code_package_url)
return (
"download-gcp-object --bucket=%s --object=%s --output-file=job.tar"
% (bucket_name, gs_object)
)
else:
raise NotImplementedError(
"We don't know how to generate a download code package cmd for datastore %s"
% datastore_type
)
def _get_install_dependencies_cmd(self, datastore_type):
base_cmd = "{} -m pip install -qqq --no-compile --no-cache-dir --disable-pip-version-check".format(
self._python()
)
datastore_packages = {
"s3": ["boto3"],
"azure": [
"azure-identity",
"azure-storage-blob",
"azure-keyvault-secrets",
"simple-azure-blob-downloader",
],
"gs": [
"google-cloud-storage",
"google-auth",
"simple-gcp-object-downloader",
"google-cloud-secret-manager",
"packaging",
],
}
if datastore_type not in datastore_packages:
raise NotImplementedError(
"Unknown datastore type: {}".format(datastore_type)
)
cmd = "{} {}".format(
base_cmd, " ".join(datastore_packages[datastore_type] + ["requests"])
)
# skip pip installs if we know that packages might already be available
return "if [ -z $METAFLOW_SKIP_INSTALL_DEPENDENCIES ]; then {}; fi".format(cmd)
def get_package_commands(
self, code_package_url, datastore_type, code_package_metadata=None
):
# HACK: We want to keep forward compatibility with compute layers so that
# they can still call get_package_commands and NOT pass any metadata. If
# there is no additional information, we *assume* that it is the default
# used.
if code_package_metadata is None:
code_package_metadata = json.dumps(
{
"version": 0,
"archive_format": "tgz",
"mfcontent_version": 1,
}
)
extra_exports = []
for k, v in MetaflowPackage.get_post_extract_env_vars(
code_package_metadata, dest_dir="$(pwd)"
).items():
if k.endswith(":"):
# If the value ends with a colon, we override the existing value
extra_exports.append("export %s=%s" % (k[:-1], v))
else:
extra_exports.append(
"export %s=%s:$(printenv %s)" % (k, v.replace('"', '\\"'), k)
)
cmds = (
[
BASH_MFLOG,
BASH_FLUSH_LOGS,
"mflog 'Setting up task environment.'",
self._get_install_dependencies_cmd(datastore_type),
"mkdir metaflow",
"cd metaflow",
"mkdir .metaflow", # mute local datastore creation log
"i=0; while [ $i -le 5 ]; do "
"mflog 'Downloading code package...'; "
+ self._get_download_code_package_cmd(code_package_url, datastore_type)
+ " && mflog 'Code package downloaded.' && break; "
"sleep 10; i=$((i+1)); "
"done",
"if [ $i -gt 5 ]; then "
"mflog 'Failed to download code package from %s "
"after 6 tries. Exiting...' && exit 1; "
"fi" % code_package_url,
]
+ MetaflowPackage.get_extract_commands(
code_package_metadata, "job.tar", dest_dir="."
)
+ extra_exports
+ [
"mflog 'Task is starting.'",
"flush_mflogs",
]
)
return cmds
def get_environment_info(self, include_ext_info=False):
# note that this dict goes into the code package
# so variables here should be relatively stable (no
# timestamps) so the hash won't change all the time
env = {
"platform": platform.system(),
"username": get_username(),
"production_token": os.environ.get("METAFLOW_PRODUCTION_TOKEN"),
"runtime": os.environ.get("METAFLOW_RUNTIME_NAME", "dev"),
"app": os.environ.get("APP"),
"environment_type": self.TYPE,
"use_r": R.use_r(),
"python_version": sys.version,
"python_version_code": "%d.%d.%d" % sys.version_info[:3],
"metaflow_version": metaflow_version.get_version(),
"script": os.path.basename(os.path.abspath(sys.argv[0])),
# Add git info
**metaflow_git.get_repository_info(
path=os.path.dirname(os.path.abspath(sys.argv[0]))
),
}
if R.use_r():
env["metaflow_r_version"] = R.metaflow_r_version()
env["r_version"] = R.r_version()
env["r_version_code"] = R.r_version_code()
if include_ext_info:
# Information about extension modules (to load them in the proper order)
ext_key, ext_val = dump_module_info()
env[ext_key] = ext_val
return {k: v for k, v in env.items() if v is not None and v != ""}
def executable(self, step_name, default=None):
if default is not None:
return default
return self._python()
def _python(self):
if R.use_r():
return "python3"
else:
return "python"
================================================
FILE: metaflow/metaflow_git.py
================================================
#!/usr/bin/env python
"""Get git repository information for the package
Functions to retrieve git repository details like URL, branch name,
and commit SHA for Metaflow code provenance tracking.
"""
import os
import subprocess
from typing import Dict, List, Optional, Tuple, Union
# Cache for git information to avoid repeated subprocess calls
_git_info_cache = None
__all__ = ("get_repository_info",)
def _call_git(
args: List[str], path=Union[str, os.PathLike]
) -> Tuple[Optional[str], Optional[int], bool]:
"""
Call git with provided args.
Returns
-------
tuple : Tuple containing
(stdout, exitcode, failure) of the call
"""
try:
result = subprocess.run(
["git", *args],
cwd=path,
capture_output=True,
text=True,
check=False,
)
return result.stdout.strip(), result.returncode, False
except (OSError, subprocess.SubprocessError):
# Covers subprocess timeouts and other errors which would not lead to an exit code
return None, None, True
def _get_repo_url(path: Union[str, os.PathLike]) -> Optional[str]:
"""Get the repository URL from git config"""
stdout, returncode, _failed = _call_git(
["config", "--get", "remote.origin.url"], path
)
if returncode == 0:
url = stdout
# Convert SSH URLs to HTTPS for clickable links
if url.startswith("git@"):
parts = url.split(":", 1)
if len(parts) == 2:
domain = parts[0].replace("git@", "")
repo_path = parts[1]
url = f"https://{domain}/{repo_path}"
return url
return None
def _get_branch_name(path: Union[str, os.PathLike]) -> Optional[str]:
"""Get the current git branch name"""
stdout, returncode, _failed = _call_git(["rev-parse", "--abbrev-ref", "HEAD"], path)
return stdout if returncode == 0 else None
def _get_commit_sha(path: Union[str, os.PathLike]) -> Optional[str]:
"""Get the current git commit SHA"""
stdout, returncode, _failed = _call_git(["rev-parse", "HEAD"], path)
return stdout if returncode == 0 else None
def _is_in_git_repo(path: Union[str, os.PathLike]) -> bool:
"""Check if we're currently in a git repository"""
stdout, returncode, _failed = _call_git(
["rev-parse", "--is-inside-work-tree"], path
)
return returncode == 0 and stdout == "true"
def _has_uncommitted_changes(path: Union[str, os.PathLike]) -> Optional[bool]:
"""Check if the git repository has uncommitted changes"""
_stdout, returncode, failed = _call_git(
["diff-index", "--quiet", "HEAD", "--"], path
)
if failed:
return None
return returncode != 0
def get_repository_info(path: Union[str, os.PathLike]) -> Dict[str, Union[str, bool]]:
"""Get git repository information for a path
Returns:
dict: Dictionary containing:
repo_url: Repository URL (converted to HTTPS if from SSH)
branch_name: Current branch name
commit_sha: Current commit SHA
has_uncommitted_changes: Boolean indicating if there are uncommitted changes
"""
global _git_info_cache
if _git_info_cache is not None:
return _git_info_cache
_git_info_cache = {}
if _is_in_git_repo(path):
_git_info_cache = {
"repo_url": _get_repo_url(path),
"branch_name": _get_branch_name(path),
"commit_sha": _get_commit_sha(path),
"has_uncommitted_changes": _has_uncommitted_changes(path),
}
return _git_info_cache
================================================
FILE: metaflow/metaflow_profile.py
================================================
import time
from contextlib import contextmanager
from .metaflow_config import PROFILE_FROM_START
init_time = None
if PROFILE_FROM_START:
def from_start(msg: str):
global init_time
if init_time is None:
init_time = time.time()
print("From start: %s took %dms" % (msg, int((time.time() - init_time) * 1000)))
else:
def from_start(_msg: str):
pass
@contextmanager
def profile(label, stats_dict=None):
if stats_dict is None:
print("PROFILE: %s starting" % label)
start = time.time()
yield
took = int((time.time() - start) * 1000)
if stats_dict is None:
print("PROFILE: %s completed in %dms" % (label, took))
else:
stats_dict[label] = stats_dict.get(label, 0) + took
================================================
FILE: metaflow/metaflow_version.py
================================================
#!/usr/bin/env python
"""Get version identification for the package
See the documentation of get_version for more information
"""
# This file is adapted from https://github.com/aebrahim/python-git-version
import subprocess
from os import path, name, environ, listdir
from metaflow.extension_support import update_package_info
from metaflow.meta_files import read_info_file
# True/False correspond to the value `public`` in get_version
_version_cache = {True: None, False: None}
__all__ = ("get_version",)
GIT_COMMAND = "git"
if name == "nt":
def find_git_on_windows():
"""find the path to the git executable on Windows"""
# first see if git is in the path
try:
subprocess.check_output(["where", "/Q", "git"])
# if this command succeeded, git is in the path
return "git"
# catch the exception thrown if git was not found
except subprocess.CalledProcessError:
pass
# There are several locations where git.exe may be hiding
possible_locations = []
# look in program files for msysgit
if "PROGRAMFILES(X86)" in environ:
possible_locations.append(
"%s/Git/cmd/git.exe" % environ["PROGRAMFILES(X86)"]
)
if "PROGRAMFILES" in environ:
possible_locations.append("%s/Git/cmd/git.exe" % environ["PROGRAMFILES"])
# look for the GitHub version of git
if "LOCALAPPDATA" in environ:
github_dir = "%s/GitHub" % environ["LOCALAPPDATA"]
if path.isdir(github_dir):
for subdir in listdir(github_dir):
if not subdir.startswith("PortableGit"):
continue
possible_locations.append(
"%s/%s/bin/git.exe" % (github_dir, subdir)
)
for possible_location in possible_locations:
if path.isfile(possible_location):
return possible_location
# git was not found
return "git"
GIT_COMMAND = find_git_on_windows()
def call_git_describe(file_to_check, abbrev=7):
"""return the string output of git describe"""
try:
wd = path.dirname(file_to_check)
filename = path.basename(file_to_check)
# First check if the file is tracked in the GIT repository we are in
# We do this because in some setups and for some bizarre reason, python files
# are installed directly into a git repository (I am looking at you brew). We
# don't want to consider this a GIT install in that case.
args = [GIT_COMMAND, "ls-files", "--error-unmatch", filename]
git_return_code = subprocess.run(
args,
cwd=wd,
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
check=False,
).returncode
if git_return_code != 0:
return None
args = [
GIT_COMMAND,
"describe",
"--tags",
"--dirty",
"--long",
"--abbrev=%d" % abbrev,
]
return (
subprocess.check_output(args, cwd=wd, stderr=subprocess.DEVNULL)
.decode("ascii")
.strip()
)
except (OSError, subprocess.CalledProcessError):
return None
def format_git_describe(git_str, public=False):
"""format the result of calling 'git describe' as a python version"""
if git_str is None:
return None
splits = git_str.split("-")
if len(splits) == 4:
# Formatted as ---dirty
tag, post, h = splits[:3]
dirty = "-" + splits[3]
else:
# Formatted as --
tag, post, h = splits
dirty = ""
if post == "0":
if public:
return tag
return tag + dirty
if public:
return "%s.post%s" % (tag, post)
return "%s.post%s-git%s%s" % (tag, post, h[1:], dirty)
def read_info_version():
"""Read version information from INFO file"""
info_file = read_info_file()
if info_file:
return info_file.get("metaflow_version")
return None
def make_public_version(version_string):
"""
Takes a complex version string and returns a public, PEP 440-compliant version.
It removes local version identifiers (+...) and development markers (-...).
"""
base_version = version_string.split("+", 1)[0]
public_version = base_version.split("-", 1)[0]
return public_version
def get_version(public=False):
"""Tracks the version number.
public: bool
When True, this function returns a *public* version specification which
doesn't include any local information (dirtiness or hash). See
https://packaging.python.org/en/latest/specifications/version-specifiers/#version-scheme
We first check the INFO file to see if we recorded a version of Metaflow. If there
is none, we check if we are in a GIT repository and if so, form the version
from that.
Otherwise, we return the version of Metaflow that was installed.
"""
global _version_cache
# To get the version we do the following:
# - Check if we have a cached version. If so, return that
# - Then check if we have an INFO file present. If so, use that as it is
# the most reliable way to get the version. In particular, when running remotely,
# metaflow is installed in a directory and if any extension is using distutils to
# determine its version, this would return None and querying the version directly
# from the extension would fail to produce the correct result
# - Then if we are in the GIT repository and if so, use the git describe
# - If we don't have an INFO file, we look at the version information that is
# populated by metaflow and the extensions.
if _version_cache[public] is not None:
return _version_cache[public]
version = (
read_info_version()
) # Version info is cached in INFO file; includes extension info
if version:
# If we have a version from the INFO file, use it directly.
# However, if we are asked for a public version, we parse it to make sure
# that no local information is included.
if public:
version = make_public_version(version)
_version_cache[public] = version
return version
# Get the version for Metaflow, favor the GIT version
import metaflow
version = format_git_describe(
call_git_describe(file_to_check=metaflow.__file__), public=public
)
if version is None:
version = metaflow.__version__
# Look for extensions and compute their versions. Properly formed extensions have
# a toplevel file which will contain a __mf_extensions__ value and a __version__
# value. We already saved the properly formed modules when loading metaflow in
# __ext_tl_modules__.
ext_versions = []
for pkg_name, extension_module in metaflow.__ext_tl_modules__:
ext_name = getattr(extension_module, "__mf_extensions__", "")
ext_version = format_git_describe(
call_git_describe(file_to_check=extension_module.__file__), public=public
)
if ext_version is None:
ext_version = getattr(extension_module, "__version__", "")
# Update the package information about reported version for the extension
# (only for the full info which is called at least once -- if we update more
# it will error out since we can only update_package_info once)
if not public:
update_package_info(
package_name=pkg_name,
extension_name=ext_name,
package_version=ext_version,
)
ext_versions.append("%s(%s)" % (ext_name, ext_version))
# We now have all the information about extensions so we can form the final string
if ext_versions:
version = version + "+" + ";".join(ext_versions)
_version_cache[public] = version
return version
================================================
FILE: metaflow/mflog/__init__.py
================================================
import math
import time
from .mflog import refine, set_should_persist
from metaflow.util import to_unicode
from metaflow.exception import MetaflowInternalError
# Log source indicates the system that *minted the timestamp*
# for the logline. This means that for a single task we can
# assume that timestamps originating from the same source are
# monotonically increasing. Clocks are not synchronized between
# log sources, so if a file contains multiple log sources, the
# lines may not be in the ascending timestamp order.
# Note that a logfile prefixed with a log source, e.g. runtime,
# may contain lines from multiple sources below it (e.g. task).
#
# Note that these file names don't match to any previous log files
# (e.g. `0.stdout.log`). Older Metaflow versions will return None
# or an empty string when trying to access these new-style files.
# This is deliberate, so the users won't see partial files with older
# clients.
RUNTIME_LOG_SOURCE = "runtime"
TASK_LOG_SOURCE = "task"
# Loglines from all sources need to be merged together to
# produce a complete view of logs. Hence, keep this list short
# since each item takes a DataStore access.
LOG_SOURCES = [RUNTIME_LOG_SOURCE, TASK_LOG_SOURCE]
# BASH_MFLOG defines a bash function that outputs valid mflog
# structured loglines. We use this to output properly timestamped
# loglined prior to Metaflow package has been downloaded.
# Note that MFLOG_STDOUT is defined by mflog_export_env_vars() function.
BASH_MFLOG = (
"mflog(){ "
"T=$(date -u -Ins|tr , .); "
'echo \\"[MFLOG|0|${T:0:26}Z|%s|$T]$1\\"'
" >> $MFLOG_STDOUT; echo $1; "
" }" % TASK_LOG_SOURCE
)
BASH_SAVE_LOGS_ARGS = ["python", "-m", "metaflow.mflog.save_logs"]
BASH_SAVE_LOGS = " ".join(BASH_SAVE_LOGS_ARGS)
BASH_FLUSH_LOGS = "flush_mflogs(){ " f"{BASH_SAVE_LOGS}; " "}"
# this function returns a bash expression that redirects stdout
# and stderr of the given bash expression to mflog.tee
def bash_capture_logs(bash_expr, var_transform=None):
if var_transform is None:
var_transform = lambda s: "$%s" % s
cmd = "python -m metaflow.mflog.tee %s %s"
parts = (
bash_expr,
cmd % (TASK_LOG_SOURCE, var_transform("MFLOG_STDOUT")),
cmd % (TASK_LOG_SOURCE, var_transform("MFLOG_STDERR")),
)
return "(%s) 1>> >(%s) 2>> >(%s >&2)" % parts
# update_delay determines how often logs should be uploaded to S3
# as a function of the task execution time
MIN_UPDATE_DELAY = 0.25 # the most frequent update interval
MAX_UPDATE_DELAY = 30.0 # the least frequent update interval
def update_delay(secs_since_start):
# this sigmoid function reaches
# - 0.1 after 11 minutes
# - 0.5 after 15 minutes
# - 1.0 after 23 minutes
# in other words, the user will see very frequent updates
# during the first 10 minutes
sigmoid = 1.0 / (1.0 + math.exp(-0.01 * secs_since_start + 9.0))
return MIN_UPDATE_DELAY + sigmoid * MAX_UPDATE_DELAY
# this function is used to generate a Bash 'export' expression that
# sets environment variables that are used by 'tee' and 'save_logs'.
# Note that we can't set the env vars statically, as some of them
# may need to be evaluated during runtime
def export_mflog_env_vars(
flow_name=None,
run_id=None,
step_name=None,
task_id=None,
retry_count=None,
datastore_type=None,
datastore_root=None,
stdout_path=None,
stderr_path=None,
):
pathspec = "/".join((flow_name, str(run_id), step_name, str(task_id)))
env_vars = {
"PYTHONUNBUFFERED": "x",
"MF_PATHSPEC": pathspec,
"MF_DATASTORE": datastore_type,
"MF_ATTEMPT": retry_count,
"MFLOG_STDOUT": stdout_path,
"MFLOG_STDERR": stderr_path,
}
if datastore_root is not None:
env_vars["MF_DATASTORE_ROOT"] = datastore_root
return "export " + " ".join("%s=%s" % kv for kv in env_vars.items())
def tail_logs(prefix, stdout_tail, stderr_tail, echo, has_log_updates):
def _available_logs(tail, stream, echo, should_persist=False):
try:
for line in tail:
if should_persist:
line = set_should_persist(line)
else:
line = refine(line, prefix=prefix)
echo(
line.strip().decode("utf-8", errors="replace"), stream, no_bold=True
)
except Exception as ex:
echo(
"%s[ temporary error in fetching logs: %s ]" % (to_unicode(prefix), ex),
"stderr",
)
start_time = time.time()
next_log_update = start_time
log_update_delay = update_delay(0)
while has_log_updates():
if time.time() > next_log_update:
_available_logs(stdout_tail, "stdout", echo)
_available_logs(stderr_tail, "stderr", echo)
now = time.time()
log_update_delay = update_delay(now - start_time)
next_log_update = now + log_update_delay
# This sleep should never delay log updates. On the other hand,
# we should exit this loop when the task has finished without
# a long delay, regardless of the log tailing schedule
time.sleep(min(log_update_delay, 5.0))
# It is possible that we exit the loop above before all logs have been
# tailed.
_available_logs(stdout_tail, "stdout", echo)
_available_logs(stderr_tail, "stderr", echo)
def get_log_tailer(log_url, datastore_type):
if datastore_type == "s3":
from metaflow.plugins.datatools.s3.s3tail import S3Tail
return S3Tail(log_url)
elif datastore_type == "azure":
from metaflow.plugins.azure.azure_tail import AzureTail
return AzureTail(log_url)
elif datastore_type == "gs":
from metaflow.plugins.gcp.gs_tail import GSTail
return GSTail(log_url)
else:
raise MetaflowInternalError(
"Log tailing implementation missing for datastore type %s"
% (datastore_type,)
)
================================================
FILE: metaflow/mflog/mflog.py
================================================
import heapq
import re
import time
import uuid
from datetime import datetime
from collections import namedtuple
from metaflow.util import to_bytes, to_fileobj, to_unicode
VERSION = b"0"
RE = rb"(\[!)?" rb"\[MFLOG\|" rb"(0)\|" rb"(.+?)Z\|" rb"(.+?)\|" rb"(.+?)\]" rb"(.*)"
# the RE groups defined above must match the MFLogline fields below
# except utc_timestamp, which is filled in by the parser based on utc_tstamp_str
MFLogline = namedtuple(
"MFLogline",
[
"should_persist",
"version",
"utc_tstamp_str",
"logsource",
"id",
"msg",
"utc_tstamp",
],
)
LINE_PARSER = re.compile(RE)
ISOFORMAT = "%Y-%m-%dT%H:%M:%S.%f"
MISSING_TIMESTAMP = datetime(3000, 1, 1)
MISSING_TIMESTAMP_STR = MISSING_TIMESTAMP.strftime(ISOFORMAT)
# utc_to_local() is based on https://stackoverflow.com/a/13287083
# NOTE: it might not work correctly for historical timestamps, e.g.
# if timezone definitions have changed. It should be ok for recently
# generated timestamps.
if time.timezone == 0:
# the local timezone is UTC (common on servers). Don't waste time
# on conversions
utc_to_local = lambda x: x
else:
try:
# python3
from datetime import timezone
def utc_to_local(utc_dt):
return utc_dt.replace(tzinfo=timezone.utc).astimezone(tz=None)
except ImportError:
# python2
import calendar
def utc_to_local(utc_dt):
timestamp = calendar.timegm(utc_dt.timetuple())
local_dt = datetime.fromtimestamp(timestamp)
return local_dt.replace(microsecond=utc_dt.microsecond)
def decorate(source, line, version=VERSION, now=None, lineid=None):
if now is None:
now = datetime.utcnow()
tstamp = to_bytes(now.strftime(ISOFORMAT))
if not lineid:
lineid = to_bytes(str(uuid.uuid4()))
line = to_bytes(line)
source = to_bytes(source)
return b"".join(
(b"[MFLOG|", version, b"|", tstamp, b"Z|", source, b"|", lineid, b"]", line)
)
def is_structured(line):
line = to_bytes(line)
return line.startswith(b"[MFLOG|") or line.startswith(b"[![MFLOG|")
def parse(line):
line = to_bytes(line)
m = LINE_PARSER.match(to_bytes(line))
if m:
try:
fields = list(m.groups())
fields.append(datetime.strptime(to_unicode(fields[2]), ISOFORMAT))
return MFLogline(*fields)
except:
pass
def set_should_persist(line):
# this marker indicates that the logline should be persisted by
# the receiver
line = to_bytes(line)
if is_structured(line) and not line.startswith(b"[!["):
return b"[!" + line
else:
return line
def unset_should_persist(line):
# prior to persisting, the should_persist marker should be removed
# from the logline using this function
line = to_bytes(line)
if is_structured(line) and line.startswith(b"[!["):
return line[2:]
else:
return line
def refine(line, prefix=None, suffix=None):
line = to_bytes(line)
prefix = to_bytes(prefix) if prefix else b""
suffix = to_bytes(suffix) if suffix else b""
parts = line.split(b"]", 1)
if len(parts) == 2:
header, body = parts
return b"".join((header, b"]", prefix, body, suffix))
else:
return line
def merge_logs(logs):
def line_iter(logblob):
# all valid timestamps are guaranteed to be smaller than
# MISSING_TIMESTAMP, hence this iterator maintains the
# ascending order even when corrupt loglines are present
missing = []
for line in to_fileobj(logblob):
res = parse(line)
if res:
yield res.utc_tstamp_str, res
else:
missing.append(line)
for line in missing:
res = MFLogline(
False,
None,
MISSING_TIMESTAMP_STR.encode("utf-8"),
None,
None,
line,
MISSING_TIMESTAMP,
)
yield res.utc_tstamp_str, res
# note that sorted() below should be a very cheap, often a O(n) operation
# because Python's Timsort is very fast for already sorted data.
for _, line in heapq.merge(*[sorted(line_iter(blob)) for blob in logs]):
yield line
================================================
FILE: metaflow/mflog/save_logs.py
================================================
import os
# This script is used to upload logs during task bootstrapping, so
# it shouldn't have external dependencies besides Metaflow itself
# (e.g. no click for parsing CLI args).
from metaflow.datastore import FlowDataStore
from metaflow.plugins import DATASTORES
from metaflow.util import Path
from . import TASK_LOG_SOURCE
from metaflow.tracing import cli
SMALL_FILE_LIMIT = 1024 * 1024
@cli("save_logs")
def save_logs():
def _read_file(path):
with open(path, "rb") as f:
return f.read()
# these env vars are set by mflog.mflog_env
pathspec = os.environ["MF_PATHSPEC"]
attempt = os.environ["MF_ATTEMPT"]
ds_type = os.environ["MF_DATASTORE"]
ds_root = os.environ.get("MF_DATASTORE_ROOT")
paths = (os.environ["MFLOG_STDOUT"], os.environ["MFLOG_STDERR"])
flow_name, run_id, step_name, task_id = pathspec.split("/")
storage_impl = [d for d in DATASTORES if d.TYPE == ds_type][0]
if ds_root is None:
def print_clean(line, **kwargs):
pass
ds_root = storage_impl.get_datastore_root_from_config(print_clean)
flow_datastore = FlowDataStore(
flow_name, None, storage_impl=storage_impl, ds_root=ds_root
)
task_datastore = flow_datastore.get_task_datastore(
run_id, step_name, task_id, int(attempt), mode="w"
)
try:
streams = ("stdout", "stderr")
sizes = [
(stream, path, os.path.getsize(path))
for stream, path in zip(streams, paths)
if os.path.exists(path)
]
if max(size for _, _, size in sizes) < SMALL_FILE_LIMIT:
op = _read_file
else:
op = Path
data = {stream: op(path) for stream, path, _ in sizes}
task_datastore.save_logs(TASK_LOG_SOURCE, data)
except:
# Upload failing is not considered a fatal error.
# This script shouldn't return non-zero exit codes
# for transient errors.
pass
if __name__ == "__main__":
save_logs()
# to debug delays in logs, comment the line above and uncomment
# this snippet:
"""
import sys
from metaflow.metaflow_profile import profile
d = {}
with profile('save_logs', stats_dict=d):
save_logs()
print('Save logs took %dms' % d['save_logs'], file=sys.stderr)
"""
================================================
FILE: metaflow/mflog/save_logs_periodically.py
================================================
import os
import sys
import time
import subprocess
from threading import Thread
from metaflow.sidecar import MessageTypes
from . import update_delay, BASH_SAVE_LOGS_ARGS
class SaveLogsPeriodicallySidecar(object):
def __init__(self):
self._thread = Thread(target=self._update_loop)
self.is_alive = True
self._thread.start()
def process_message(self, msg):
if msg.msg_type == MessageTypes.SHUTDOWN:
self.is_alive = False
@classmethod
def get_worker(cls):
return cls
def _update_loop(self):
def _file_size(path):
if os.path.exists(path):
return os.path.getsize(path)
else:
return 0
# these env vars are set by mflog.mflog_env
FILES = [os.environ["MFLOG_STDOUT"], os.environ["MFLOG_STDERR"]]
start_time = time.time()
sizes = [0 for _ in FILES]
while self.is_alive:
new_sizes = list(map(_file_size, FILES))
if new_sizes != sizes:
sizes = new_sizes
try:
subprocess.call(BASH_SAVE_LOGS_ARGS)
except:
pass
time.sleep(update_delay(time.time() - start_time))
================================================
FILE: metaflow/mflog/tee.py
================================================
import sys
from .mflog import decorate
# This script is similar to the command-line utility 'tee':
# It reads stdin line by line and writes the lines to stdout
# and a file. In contrast to 'tee', this script formats each
# line with mflog-style structure.
if __name__ == "__main__":
SOURCE = sys.argv[1].encode("ascii")
with open(sys.argv[2], mode="ab", buffering=0) as f:
if sys.version_info < (3, 0):
# Python 2
for line in iter(sys.stdin.readline, ""):
# https://bugs.python.org/issue3907
decorated = decorate(SOURCE, line)
f.write(decorated)
sys.stdout.write(line)
else:
# Python 3
for line in sys.stdin.buffer:
decorated = decorate(SOURCE, line)
f.write(decorated)
sys.stdout.buffer.write(line)
================================================
FILE: metaflow/monitor.py
================================================
import time
from contextlib import contextmanager
from metaflow.sidecar import Message, MessageTypes, Sidecar
COUNTER_TYPE = "COUNTER"
GAUGE_TYPE = "GAUGE"
TIMER_TYPE = "TIMER"
class NullMonitor(object):
TYPE = "nullSidecarMonitor"
def __init__(self, *args, **kwargs):
# Currently passed flow and env as kwargs
self._sidecar = Sidecar(self.TYPE)
def start(self):
return self._sidecar.start()
def terminate(self):
return self._sidecar.terminate()
def send(self, msg):
# Arbitrary message sending. Useful if you want to override some different
# types of messages.
self._sidecar.send(msg)
@contextmanager
def count(self, name):
if self._sidecar.is_active:
counter = Counter(name)
counter.increment()
payload = {"counter": counter.serialize()}
msg = Message(MessageTypes.BEST_EFFORT, payload)
yield
self._sidecar.send(msg)
else:
yield
@contextmanager
def measure(self, name):
if self._sidecar.is_active:
timer = Timer(name + "_timer")
counter = Counter(name + "_counter")
timer.start()
counter.increment()
yield
timer.end()
payload = {"counter": counter.serialize(), "timer": timer.serialize()}
msg = Message(MessageTypes.BEST_EFFORT, payload)
self._sidecar.send(msg)
else:
yield
def gauge(self, gauge):
if self._sidecar.is_active:
payload = {"gauge": gauge.serialize()}
msg = Message(MessageTypes.BEST_EFFORT, payload)
self._sidecar.send(msg)
@classmethod
def get_worker(cls):
return None
class Metric(object):
"""
Abstract base class
"""
def __init__(self, metric_type, name, context=None):
self._type = metric_type
self._name = name
self._context = context
@property
def metric_type(self):
return self._type
@property
def name(self):
return self._name
@property
def context(self):
return self._context
@context.setter
def context(self, new_context):
self._context = new_context
@property
def value(self):
raise NotImplementedError()
def serialize(self):
# We purposefully do not serialize the context as it can be large;
# it will be transferred using a different mechanism and reset on the other
# end.
return {"_name": self._name, "_type": self._type}
@classmethod
def deserialize(cls, value):
if value is None:
return None
metric_type = value.get("_type", "INVALID")
metric_name = value.get("_name", None)
metric_cls = _str_type_to_type.get(metric_type, None)
if metric_cls:
return metric_cls.deserialize(metric_name, value)
else:
raise NotImplementedError("Metric class %s is not supported" % metric_type)
class Timer(Metric):
def __init__(self, name, env=None):
super(Timer, self).__init__(TIMER_TYPE, name, env)
self._start = 0
self._end = 0
def start(self, now=None):
if now is None:
now = time.time()
self._start = now
def end(self, now=None):
if now is None:
now = time.time()
self._end = now
@property
def duration(self):
return self._end - self._start
@property
def value(self):
return self.duration * 1000
def serialize(self):
parent_ser = super(Timer, self).serialize()
parent_ser["_start"] = self._start
parent_ser["_end"] = self._end
return parent_ser
@classmethod
def deserialize(cls, metric_name, value):
t = Timer(metric_name)
t.start(value.get("_start", 0))
t.end(value.get("_end", 0))
return t
class Counter(Metric):
def __init__(self, name, env=None):
super(Counter, self).__init__(COUNTER_TYPE, name, env)
self._count = 0
def increment(self):
self._count += 1
def set_count(self, count):
self._count = count
@property
def value(self):
return self._count
def serialize(self):
parent_ser = super(Counter, self).serialize()
parent_ser["_count"] = self._count
return parent_ser
@classmethod
def deserialize(cls, metric_name, value):
c = Counter(metric_name)
c.set_count(value.get("_count", 0))
return c
class Gauge(Metric):
def __init__(self, name, env=None):
super(Gauge, self).__init__(GAUGE_TYPE, name, env)
self._value = 0
def set_value(self, val):
self._value = val
def increment(self):
self._value += 1
@property
def value(self):
return self._value
def serialize(self):
parent_ser = super(Gauge, self).serialize()
parent_ser["_value"] = self._value
return parent_ser
@classmethod
def deserialize(cls, metric_name, value):
g = Gauge(metric_name)
g.set_value(value.get("_value", 0))
return g
_str_type_to_type = {COUNTER_TYPE: Counter, GAUGE_TYPE: Gauge, TIMER_TYPE: Timer}
================================================
FILE: metaflow/multicore_utils.py
================================================
import sys
import os
import traceback
from itertools import islice
from tempfile import NamedTemporaryFile
import time
import metaflow.tracing as tracing
from typing import (
Any,
Callable,
Iterable,
Iterator,
List,
Optional,
NoReturn,
Tuple,
TypeVar,
Union,
)
try:
# Python 2
import cPickle as pickle
except:
# Python 3
import pickle
# This module reimplements select functions from the standard
# Python multiprocessing module.
#
# Three reasons why:
#
# 1) Multiprocessing has open bugs, e.g. https://bugs.python.org/issue29759
# 2) Work around limits, like the 32MB object limit in Queue, without
# introducing an external dependency like joblib.
# 3) Supports closures and lambdas in contrast to multiprocessing.
class MulticoreException(Exception):
pass
_A = TypeVar("_A")
_R = TypeVar("_R")
def _spawn(
func: Callable[[_A], _R], arg: _A, dir: Optional[str]
) -> Union[Tuple[int, str], NoReturn]:
with NamedTemporaryFile(prefix="parallel_map_", dir=dir, delete=False) as tmpfile:
output_file = tmpfile.name
# Make sure stdout and stderr are flushed before forking,
# or else we may print multiple copies of the same output
sys.stderr.flush()
sys.stdout.flush()
pid = os.fork()
if pid:
return pid, output_file
else:
with tracing.post_fork():
try:
exit_code = 1
ret = func(arg)
with open(output_file, "wb") as f:
pickle.dump(ret, f, protocol=pickle.HIGHEST_PROTOCOL)
exit_code = 0
except:
# we must not let any exceptions escape this function
# which might trigger unintended side-effects
traceback.print_exc()
finally:
sys.stderr.flush()
sys.stdout.flush()
# we can't use sys.exit(0) here since it raises SystemExit
# that may have unintended side-effects (e.g. triggering
# finally blocks).
os._exit(exit_code)
def parallel_imap_unordered(
func: Callable[[_A], _R],
iterable: Iterable[_A],
max_parallel: Optional[int] = None,
dir: Optional[str] = None,
) -> Iterator[_R]:
"""
Parallelizes execution of a function using multiprocessing. The result
order is not guaranteed.
Parameters
----------
func : Callable[[Any], Any]
Function taking a single argument and returning a result
iterable : Iterable[Any]
Iterable over arguments to pass to fun
max_parallel int, optional, default None
Maximum parallelism. If not specified, it uses the number of CPUs
dir : str, optional, default None
If specified, it's the directory where temporary files are created
Yields
------
Any
One result from calling func on one argument
"""
if max_parallel is None:
# Lazy import to save on startup time for metaflow as a whole
from multiprocessing import cpu_count
max_parallel = cpu_count()
args_iter = iter(iterable)
pids = [_spawn(func, arg, dir) for arg in islice(args_iter, max_parallel)]
while pids:
for idx, pid_info in enumerate(pids):
pid, output_file = pid_info
pid, exit_code = os.waitpid(pid, os.WNOHANG)
if pid:
pids.pop(idx)
break
else:
time.sleep(0.1) # Wait a bit before re-checking
continue
if exit_code:
raise MulticoreException("Child failed")
with open(output_file, "rb") as f:
yield pickle.load(f)
os.remove(output_file)
arg = list(islice(args_iter, 1))
if arg:
pids.insert(0, _spawn(func, arg[0], dir))
def parallel_map(
func: Callable[[_A], _R],
iterable: Iterable[_A],
max_parallel: Optional[int] = None,
dir: Optional[str] = None,
) -> List[_R]:
"""
Parallelizes execution of a function using multiprocessing. The result
order is that of the arguments in `iterable`.
Parameters
----------
func : Callable[[Any], Any]
Function taking a single argument and returning a result
iterable : Iterable[Any]
Iterable over arguments to pass to fun
max_parallel int, optional, default None
Maximum parallelism. If not specified, it uses the number of CPUs
dir : str, optional, default None
If specified, it's the directory where temporary files are created
Returns
-------
List[Any]
Results. The items in the list are in the same order as the items
in `iterable`.
"""
def wrapper(arg_with_idx):
idx, arg = arg_with_idx
return idx, func(arg)
res = parallel_imap_unordered(
wrapper, enumerate(iterable), max_parallel=max_parallel, dir=dir
)
return [r for _, r in sorted(res)]
================================================
FILE: metaflow/package/__init__.py
================================================
import json
import os
import sys
import threading
import time
from io import BytesIO
from types import ModuleType
from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING, Type, cast
from ..debug import debug
from ..packaging_sys import ContentType, MetaflowCodeContent
from ..packaging_sys.backend import PackagingBackend
from ..packaging_sys.tar_backend import TarPackagingBackend
from ..packaging_sys.v1 import MetaflowCodeContentV1
from ..packaging_sys.utils import suffix_filter, walk
from ..metaflow_config import DEFAULT_PACKAGE_SUFFIXES
from ..exception import MetaflowException
from ..user_configs.config_parameters import dump_config_values
from .. import R
DEFAULT_SUFFIXES_LIST = DEFAULT_PACKAGE_SUFFIXES.split(",")
if TYPE_CHECKING:
import metaflow.datastore
class NonUniqueFileNameToFilePathMappingException(MetaflowException):
headline = "Non-unique file path for a file name included in code package"
def __init__(self, filename, file_paths, lineno=None):
msg = (
"Filename %s included in the code package includes multiple different "
"paths for the same name : %s.\n"
"The `filename` in the `add_to_package` decorator hook requires a unique "
"`file_path` to `file_name` mapping" % (filename, ", ".join(file_paths))
)
super().__init__(msg=msg, lineno=lineno)
class MetaflowPackage(object):
def __init__(
self,
flow,
environment,
echo,
suffixes: Optional[List[str]] = DEFAULT_SUFFIXES_LIST,
user_code_filter: Optional[Callable[[str], bool]] = None,
flow_datastore: Optional["metaflow.datastore.FlowDataStore"] = None,
mfcontent: Optional[MetaflowCodeContent] = None,
exclude_tl_dirs=None,
backend: Type[PackagingBackend] = TarPackagingBackend,
):
self._environment = environment
self._environment.init_environment(echo)
self._echo = echo
self._flow = flow
self._flow_datastore = flow_datastore
self._backend = backend
# Info about the package
self._name = None
self._create_time = time.time()
self._user_flow_dir = None
# Content of the package (and settings on how to create it)
if suffixes is not None:
self._suffixes = list(set().union(suffixes, DEFAULT_SUFFIXES_LIST))
else:
self._suffixes = None
def _module_selector(m) -> bool:
from ..user_decorators.user_flow_decorator import FlowMutatorMeta
from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
# Be very defensive here to filter modules in case there are
# some badly behaved modules that have weird values for
# METAFLOW_PACKAGE_POLICY for example.
try:
if (
m.__name__ in FlowMutatorMeta._import_modules
or m.__name__ in UserStepDecoratorMeta._import_modules
or (
hasattr(m, "METAFLOW_PACKAGE_POLICY")
and m.METAFLOW_PACKAGE_POLICY == "include"
)
):
return True
return False
except:
return False
if mfcontent is None:
self._mfcontent = MetaflowCodeContentV1(criteria=_module_selector)
else:
self._mfcontent = mfcontent
# We exclude the environment when packaging as this will be packaged separately.
# This comes into play primarily if packaging from a node already running packaged
# code.
# These directories are only excluded at the top-level (ie: not further down
# in sub-directories)
# "_escape_trampolines" is a special directory where trampoline escape hatch
# files are stored (used by Netflix Extension's Conda implementation).
self._exclude_tl_dirs = (
self._mfcontent.get_excluded_tl_entries()
+ ["_escape_trampolines"]
+ (exclude_tl_dirs or [])
)
if self._suffixes is not None and user_code_filter is not None:
self._user_code_filter = lambda x, f1=suffix_filter(
self._suffixes
), f2=user_code_filter: f1(x) and f2(x)
self._filter_type = "suffixes and user filter"
elif self._suffixes is not None:
self._user_code_filter = suffix_filter(self._suffixes)
self._filter_type = "suffixes"
elif user_code_filter is not None:
self._user_code_filter = user_code_filter
self._filter_type = "user filter"
else:
self._user_code_filter = lambda x: True
self._filter_type = "no filter"
# Info about the package creation (it happens async)
self._is_package_available = None
self._blob_sha = None
self._blob_url = None
self._blob = None
# We launch a thread to create the package asynchronously and upload
# it opportunistically
self._create_thread = threading.Thread(
target=self._package_and_upload,
daemon=True,
)
self._create_thread.start()
# HORRIBLE HACK SO THAT CURRENT COMPUTE IMPLEMENTATIONS CAN STILL
# DO pkg.blob. Ideally, this goes away and blob_with_timeout becomes
# the main method (called blob).
@property
def blob(self) -> BytesIO:
return self.blob_with_timeout()
def blob_with_timeout(self, timeout: Optional[float] = None) -> BytesIO:
if self._blob is None:
self._create_thread.join(timeout)
if self._is_package_available is not None:
# We have our result now
if self._is_package_available:
return self._blob
else:
raise self._packaging_exception
return self._blob
def package_sha(self, timeout: Optional[float] = None) -> Optional[str]:
if self._blob_sha is None:
self._create_thread.join(timeout)
if self._is_package_available is not None:
# We have our result now
if self._is_package_available:
return self._blob_sha
else:
raise self._packaging_exception
return self._blob_sha
def package_url(self, timeout: Optional[float] = None) -> Optional[str]:
if self._blob_url is None:
self._create_thread.join(timeout)
if self._is_package_available is not None:
# We have our result now
if self._is_package_available:
return self._blob_url
else:
raise self._packaging_exception
return self._blob_url
@property
def package_metadata(self):
return json.dumps(
{
"version": 0,
"archive_format": self._backend.backend_type(),
"mfcontent_version": self._mfcontent.get_package_version(),
}
)
@classmethod
def get_backend(cls, pkg_metadata: str) -> PackagingBackend:
"""
Method to get the backend type from the package metadata.
Parameters
----------
pkg_metadata : str
The metadata of the package to extract.
Returns
-------
PackagingBackend
The backend type that can be used to extract the package.
"""
backend_type = json.loads(pkg_metadata).get("archive_format", "tgz")
return PackagingBackend.get_backend(backend_type)
@classmethod
def get_extract_commands(
cls, pkg_metadata: str, archive_path: str, dest_dir: str = "."
) -> List[str]:
"""
Method to get the commands needed to extract the package into
the directory dest_dir. Note that this will return a list of commands
that can be passed to subprocess.run for example.
Parameters
----------
pkg_metadata : str
The metadata of the package to extract.
archive_path : str
The path to the archive to extract.
dest_dir : str, default "."
The directory to extract the package into.
Returns
-------
List[str]
The commands needed to extract the package into the directory dest_dir.
"""
backend_type = json.loads(pkg_metadata).get("archive_format", "tgz")
# We now ask the backend type how to extract itself
backend = PackagingBackend.get_backend(backend_type)
cmds = backend.get_extract_commands(archive_path, dest_dir)
debug.package_exec(f"Command to extract {archive_path} into {dest_dir}: {cmds}")
return cmds
@classmethod
def get_post_extract_env_vars(
cls, pkg_metadata: str, dest_dir: str = "."
) -> Dict[str, str]:
"""
Method to get the environment variables needed to access the content
that has been extracted into the directory dest_dir. This will
typically involve setting PYTHONPATH
Parameters
----------
pkg_metadata : str
The metadata of the package to extract.
dest_dir : str, default "."
The directory where the content has been extracted to.
Returns
-------
Dict[str, str]
The post-extract environment variables that are needed to access the content
that has been extracted into dest_dir.
"""
mfcontent_version = json.loads(pkg_metadata).get("mfcontent_version", 0)
env_vars = MetaflowCodeContent.get_post_extract_env_vars(
mfcontent_version, dest_dir
)
debug.package_exec(
f"Environment variables to access content extracted into {dest_dir}: {env_vars}"
)
return env_vars
@classmethod
def cls_get_content(
cls, pkg_metadata, archive: BytesIO, name: str
) -> Optional[bytes]:
"""
Method to get the content of a member in the package archive.
Parameters
----------
pkg_metadata : str
The metadata of the package to extract.
archive : BytesIO
The archive to extract the member from.
name : str
The name of the member to extract.
Returns
-------
Optional[bytes]
The content of the member if it exists, None otherwise.
"""
backend = cls.get_backend(pkg_metadata)
with backend.cls_open(archive) as opened_archive:
return backend.cls_get_member(opened_archive, name)
@classmethod
def cls_get_info(cls, pkg_metadata, archive: BytesIO) -> Optional[Dict[str, str]]:
"""
Method to get the info of the package from the archive.
Parameters
----------
pkg_metadata : str
The metadata of the package to extract.
archive : BytesIO
The archive to extract the info from.
Returns
-------
Optional[Dict[str, str]]
The info of the package if it exists, None otherwise.
"""
backend = cls.get_backend(pkg_metadata)
with backend.cls_open(archive) as opened_archive:
return MetaflowCodeContent.get_archive_info(opened_archive, backend)
@classmethod
def cls_get_config(
cls, pkg_metadata: str, archive: BytesIO
) -> Optional[Dict[str, str]]:
"""
Method to get the config of the package from the archive.
Parameters
----------
pkg_metadata : str
The metadata of the package to extract.
archive : BytesIO
The archive to extract the config from.
Returns
-------
Optional[Dict[str, str]]
The config of the package if it exists, None otherwise.
"""
backend = cls.get_backend(pkg_metadata)
with backend.cls_open(archive) as opened_archive:
return MetaflowCodeContent.get_archive_config(opened_archive, backend)
@classmethod
def cls_extract_into(
cls,
pkg_metadata: str,
archive: BytesIO,
dest_dir: str = ".",
content_types: int = ContentType.ALL_CONTENT.value,
):
"""
Method to extract the package archive into a directory.
Parameters
----------
pkg_metadata : str
The metadata of the package to extract.
archive : BytesIO
The archive to extract.
dest_dir : str, default "."
The directory to extract the package into.
content_types : int, default ALL_CONTENT
The types of content to extract. This is a bitmask of ContentType values.
"""
backend = cls.get_backend(pkg_metadata)
with backend.cls_open(archive) as opened_archive:
include_members = MetaflowCodeContent.get_archive_content_members(
opened_archive, content_types, backend
)
backend.cls_extract_members(opened_archive, include_members, dest_dir)
def user_tuples(self, timeout: Optional[float] = None):
# Wait for at least the blob to be formed
_ = self.blob_with_timeout(timeout=timeout)
for path, arcname in self._cached_user_members:
yield path, arcname
def path_tuples(self, timeout: Optional[float] = None):
# Wait for at least the blob to be formed
_ = self.blob_with_timeout(timeout=timeout)
# Files included in the environment
yield from self._mfcontent.content_names()
# Files included in the user code
yield from self.user_tuples()
def show(self, timeout: Optional[float] = None) -> str:
# Human-readable content of the package
blob = self.blob_with_timeout(timeout=timeout) # Ensure the package is created
lines = [
f"Package size: {self._format_size(len(blob))}",
f"Number of files: {sum(1 for _ in self.path_tuples())}",
self._mfcontent.show(),
]
if self._flow:
lines.append(f"\nUser code in flow {self._name}:")
lines.append(f" - Packaged from directory {self._user_flow_dir}")
if self._filter_type != "no filter":
if self._suffixes:
lines.append(
f" - Filtered by suffixes: {', '.join(self._suffixes)}"
)
else:
lines.append(f" - Filtered by {self._filter_type}")
else:
lines.append(" - No user code filter applied")
if self._exclude_tl_dirs:
lines.append(
f" - Excluded directories: {', '.join(self._exclude_tl_dirs)}"
)
return "\n".join(lines)
def get_content(
self, name: str, content_type: ContentType, timeout: Optional[float] = None
) -> Optional[bytes]:
"""
Method to get the content of a file within the package. This method
should be used for one-off access to small-ish files. If more files are
needed, use extract_into to extract the package into a directory and
then access the files from there.
Parameters
----------
name : str
The name of the file to get the content of. Note that this
is not necessarily the name in the archive but is the name
that was passed in when creating the archive (in the archive,
it may be prefixed by some directory structure).
content_type : ContentType
The type of file to get the content of.
Returns
-------
Optional[bytes]
The content of the file. If the file is not found, None is returned.
"""
# Wait for at least the blob to be formed
_ = self.blob_with_timeout(timeout=timeout)
if content_type == ContentType.USER_CONTENT:
for path, arcname in self.user_tuples():
if name == arcname:
return open(path, "rb").read()
return None
elif content_type in (
ContentType.CODE_CONTENT,
ContentType.MODULE_CONTENT,
ContentType.OTHER_CONTENT,
):
mangled_name = self._mfcontent.get_archive_filename(name, content_type)
for path_or_bytes, arcname in self._mfcontent.contents(content_type):
if mangled_name == arcname:
if isinstance(path_or_bytes, bytes):
# In case this is generated content like an INFO file
return path_or_bytes
# Otherwise, it is a file path
return open(path_or_bytes, "rb").read()
return None
raise ValueError(f"Unknown content type: {content_type}")
def extract_into(
self,
dest_dir: str = ".",
content_types: int = ContentType.ALL_CONTENT.value,
timeout: Optional[float] = None,
):
"""
Method to extract the package (or some of the files) into a directory.
Parameters
----------
dest_dir : str, default "."
The directory to extract the package into.
content_types : int, default ALL_CONTENT
The types of content to extract.
"""
_ = self.blob_with_timeout(timeout=timeout) # Ensure the package is created
member_list = []
if content_types & ContentType.USER_CONTENT.value:
member_list.extend(
[(m[0], os.path.join(dest_dir, m[1])) for m in self.user_tuples()]
)
if content_types & (
ContentType.CODE_CONTENT.value | ContentType.MODULE_CONTENT.value
):
# We need to get the name of the files in the content archive to extract
member_list.extend(
[
(m[0], os.path.join(dest_dir, m[1]))
for m in self._mfcontent.content_names(
content_types & ~ContentType.OTHER_CONTENT.value
)
]
)
for orig_path, new_path in member_list:
os.makedirs(os.path.dirname(new_path), exist_ok=True)
# TODO: In case there are duplicate files -- that should not be the case
# but there is a bug currently with internal Netflix code.
if not os.path.exists(new_path):
os.symlink(orig_path, new_path)
# Could copy files as well if we want to split them out.
# shutil.copy(orig_path, new_path)
# OTHER_CONTENT requires special handling because sometimes the file isn't a file
# but generated content
member_list = []
if content_types & ContentType.OTHER_CONTENT.value:
member_list.extend(
[
(m[0], os.path.join(dest_dir, m[1]))
for m in self._mfcontent.contents(ContentType.OTHER_CONTENT)
]
)
for path_or_content, new_path in member_list:
os.makedirs(os.path.dirname(new_path), exist_ok=True)
if not os.path.exists(new_path):
if isinstance(path_or_content, bytes):
with open(new_path, "wb") as f:
f.write(path_or_content)
else:
os.symlink(path_or_content, new_path)
@staticmethod
def _format_size(size_in_bytes):
for unit in ["B", "KB", "MB", "GB", "TB"]:
if size_in_bytes < 1024.0:
return f"{size_in_bytes:.2f} {unit}"
size_in_bytes /= 1024.0
return f"{size_in_bytes:.2f} PB"
def _package_and_upload(self):
try:
# Can be called without a flow (Function)
if self._flow:
for step in self._flow:
for deco in step.decorators:
deco.package_init(self._flow, step.__name__, self._environment)
self._name = f"flow {self._flow.name}"
else:
self._name = ""
# Add metacontent
self._mfcontent.add_info(
self._environment.get_environment_info(include_ext_info=True)
)
self._mfcontent.add_config(dump_config_values(self._flow))
# Add user files (from decorators and environment)
if self._flow:
self._add_addl_files()
self._cached_user_members = list(self._user_code_tuples())
debug.package_exec(
f"User files to package: {self._cached_user_members}"
)
self._blob = self._make()
if self._flow_datastore:
if len(self._blob) > 100 * 1024 * 1024:
self._echo(
f"Warning: The code package for {self._flow.name} is larger than "
f"100MB (found it to be {self._format_size(len(self._blob))}) "
"This may lead to slower upload times for remote runs and no "
"uploads for local runs. Consider reducing the package size. "
"Use ` package info` or ` package list` "
"to get more information about what is included in the package."
)
self._blob_url, self._blob_sha = self._flow_datastore.save_data(
[self._blob], len_hint=1
)[0]
else:
self._blob_url = self._blob_sha = ""
self._is_package_available = True
except Exception as e:
self._packaging_exception = e
self._echo(f"Package creation/upload failed for {self._flow.name}: {e}")
self._is_package_available = False
def _add_addl_files(self):
# Look at all decorators that provide additional files
deco_module_paths = {}
addl_modules = set()
def _check_tuple(path_tuple):
if len(path_tuple) == 2:
path_tuple = (
path_tuple[0],
path_tuple[1],
ContentType.CODE_CONTENT,
)
file_path, file_name, file_type = path_tuple
if file_type == ContentType.MODULE_CONTENT:
if file_path in addl_modules:
return None # Module was already added -- we don't add twice
addl_modules.add(file_path)
elif file_type in (
ContentType.OTHER_CONTENT,
ContentType.CODE_CONTENT,
):
path_tuple = (os.path.realpath(path_tuple[0]), path_tuple[1], file_type)
# These are files
# Check if the path is not duplicated as
# many steps can have the same packages being imported
if file_name not in deco_module_paths:
deco_module_paths[file_name] = file_path
elif deco_module_paths[file_name] != file_path:
raise NonUniqueFileNameToFilePathMappingException(
file_name, [deco_module_paths[file_name], file_path]
)
else:
raise ValueError(f"Unknown file type: {file_type}")
return path_tuple
def _add_tuple(path_tuple):
file_path, file_name, file_type = path_tuple
if file_type == ContentType.MODULE_CONTENT:
# file_path is actually a module
self._mfcontent.add_module(cast(ModuleType, file_path))
elif file_type == ContentType.CODE_CONTENT:
self._mfcontent.add_code_file(file_path, file_name)
elif file_type == ContentType.OTHER_CONTENT:
self._mfcontent.add_other_file(file_path, file_name)
for step in self._flow:
for deco in step.decorators:
for path_tuple in deco.add_to_package():
path_tuple = _check_tuple(path_tuple)
if path_tuple is None:
continue
_add_tuple(path_tuple)
# the package folders for environment
for path_tuple in self._environment.add_to_package():
path_tuple = _check_tuple(path_tuple)
if path_tuple is None:
continue
_add_tuple(path_tuple)
def _user_code_tuples(self):
if R.use_r():
# the R working directory
self._user_flow_dir = R.working_dir()
for path_tuple in walk(
"%s/" % R.working_dir(), file_filter=self._user_code_filter
):
yield path_tuple
# the R package
for path_tuple in R.package_paths():
yield path_tuple
else:
# the user's working directory
flowdir = os.path.dirname(os.path.abspath(sys.argv[0])) + "/"
self._user_flow_dir = flowdir
for path_tuple in walk(
flowdir,
file_filter=self._user_code_filter,
exclude_tl_dirs=self._exclude_tl_dirs,
):
# TODO: This is where we will check if the file is already included
# in the mfcontent portion
yield path_tuple
def _make(self):
backend = self._backend()
with backend.create() as archive:
# Package the environment
for path_or_bytes, arcname in self._mfcontent.contents():
if isinstance(path_or_bytes, str):
archive.add_file(path_or_bytes, arcname=arcname)
else:
archive.add_data(BytesIO(path_or_bytes), arcname=arcname)
# Package the user code
for path, arcname in self._cached_user_members:
archive.add_file(path, arcname=arcname)
return backend.get_blob()
def __str__(self):
return f""
================================================
FILE: metaflow/packaging_sys/__init__.py
================================================
import json
import os
from enum import IntEnum
from types import ModuleType
from typing import (
Any,
Dict,
Generator,
List,
Optional,
TYPE_CHECKING,
Tuple,
Type,
Union,
)
from metaflow.packaging_sys.distribution_support import PackagedDistributionFinder
from .backend import PackagingBackend
from .tar_backend import TarPackagingBackend
from ..util import get_metaflow_root
MFCONTENT_MARKER = ".mf_install"
if TYPE_CHECKING:
import metaflow.extension_support.metadata
class ContentType(IntEnum):
USER_CONTENT = (
0x1 # File being added is user code (ie: the directory with the flow file)
)
CODE_CONTENT = (
0x2 # File being added is non-user code (libraries, metaflow itself, ...)
)
MODULE_CONTENT = 0x4 # File being added is a python module
OTHER_CONTENT = 0x8 # File being added is a non-python file
ALL_CONTENT = USER_CONTENT | CODE_CONTENT | MODULE_CONTENT | OTHER_CONTENT
class MetaflowCodeContent:
"""
Base class for all Metaflow code packages (non user code).
A Metaflow code package, at a minimum, contains:
- a special INFO file (containing a bunch of metadata about the Metaflow environment)
- a special CONFIG file (containing user configurations for the flow)
Declare all other MetaflowCodeContent subclasses (versions) here to handle just the functions
that are not implemented here. In a *separate* file, declare any other
function for that specific version.
NOTE: This file must remain as dependency-free as possible as it is loaded *very*
early on. This is why you must decleare a *separate* class implementing what you want
the Metaflow code package (non user) to do.
"""
_cached_mfcontent_info = {}
_mappings = {}
@classmethod
def get_info(cls) -> Optional[Dict[str, Any]]:
"""
Get the content of the special INFO file on the local filesystem after
the code package has been expanded.
Returns
-------
Optional[Dict[str, Any]]
The content of the INFO file -- None if there is no such file.
"""
mfcontent_info = cls._extract_mfcontent_info()
handling_cls = cls._get_mfcontent_class(mfcontent_info)
return handling_cls.get_info_impl(mfcontent_info)
@classmethod
def get_config(cls) -> Optional[Dict[str, Any]]:
"""
Get the content of the special CONFIG file on the local filesystem after
the code package has been expanded.
Returns
-------
Optional[Dict[str, Any]]
The content of the CONFIG file -- None if there is no such file.
"""
mfcontent_info = cls._extract_mfcontent_info()
handling_cls = cls._get_mfcontent_class(mfcontent_info)
return handling_cls.get_config_impl(mfcontent_info)
@classmethod
def get_filename(cls, filename: str, content_type: ContentType) -> Optional[str]:
"""
Get the path to a file extracted from the archive. The filename is the filename
passed in when creating the archive and content_type is the type of the content.
This function will return the local path where the file can be found after
the package has been extracted.
Parameters
----------
filename: str
The name of the file on the filesystem.
content_type: ContentType
Returns
-------
str
The path to the file on the local filesystem or None if not found.
"""
mfcontent_info = cls._extract_mfcontent_info()
handling_cls = cls._get_mfcontent_class(mfcontent_info)
return handling_cls.get_filename_impl(mfcontent_info, filename, content_type)
@classmethod
def get_env_vars_for_packaged_metaflow(cls, dest_dir: str) -> Dict[str, str]:
"""
Get the environment variables that are needed to run Metaflow when it is
packaged. This is typically used to set the PYTHONPATH to include the
directory where the Metaflow code package has been extracted.
Returns
-------
Dict[str, str]
The environment variables that are needed to run Metaflow when it is
packaged it present.
"""
mfcontent_info = cls._extract_mfcontent_info(dest_dir)
if mfcontent_info is None:
# No MFCONTENT_MARKER file found -- this is not a packaged Metaflow code
# package so no environment variables to set.
return {}
handling_cls = cls._get_mfcontent_class(mfcontent_info)
v = handling_cls.get_post_extract_env_vars_impl(dest_dir)
v["METAFLOW_EXTRACTED_ROOT:"] = dest_dir
return v
@classmethod
def get_archive_info(
cls,
archive: Any,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> Optional[Dict[str, Any]]:
"""
Get the content of the special INFO file in the archive.
Returns
-------
Optional[Dict[str, Any]]
The content of the INFO file -- None if there is no such file.
"""
mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
handling_cls = cls._get_mfcontent_class(mfcontent_info)
return handling_cls.get_archive_info_impl(
mfcontent_info, archive, packaging_backend
)
@classmethod
def get_archive_config(
cls,
archive: Any,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> Optional[Dict[str, Any]]:
"""
Get the content of the special CONFIG file in the archive.
Returns
-------
Optional[Dict[str, Any]]
The content of the CONFIG file -- None if there is no such file.
"""
mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
handling_cls = cls._get_mfcontent_class(mfcontent_info)
return handling_cls.get_archive_config_impl(
mfcontent_info, archive, packaging_backend
)
@classmethod
def get_archive_filename(
cls,
archive: Any,
filename: str,
content_type: ContentType,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> Optional[str]:
"""
Get the filename of the archive. This does not do any extraction but simply
returns where, in the archive, the file is located. This is the equivalent of
get_filename but for files not extracted yet.
Parameters
----------
archive: Any
The archive to get the filename from.
filename: str
The name of the file in the archive.
content_type: ContentType
The type of the content (e.g., code, other, etc.).
packaging_backend: Type[PackagingBackend], default TarPackagingBackend
The packaging backend to use.
Returns
-------
str
The filename of the archive or None if not found.
"""
mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
handling_cls = cls._get_mfcontent_class(mfcontent_info)
return handling_cls.get_archive_filename_impl(
mfcontent_info, archive, filename, content_type, packaging_backend
)
@classmethod
def get_archive_content_members(
cls,
archive: Any,
content_types: Optional[int] = None,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> List[Any]:
mfcontent_info = cls._extract_archive_mfcontent_info(archive, packaging_backend)
handling_cls = cls._get_mfcontent_class(mfcontent_info)
return handling_cls.get_archive_content_members_impl(
mfcontent_info, archive, content_types, packaging_backend
)
@classmethod
def get_distribution_finder(
cls,
) -> Optional["metaflow.extension_support.metadata.DistributionFinder"]:
"""
Get the distribution finder for the Metaflow code package (if applicable).
Some packages will include distribution information to "pretend" that some packages
are actually distributions even if we just include them in the code package.
Returns
-------
Optional["metaflow.extension_support.metadata.DistributionFinder"]
The distribution finder for the Metaflow code package -- None if there is no
such finder.
"""
mfcontent_info = cls._extract_mfcontent_info()
handling_cls = cls._get_mfcontent_class(mfcontent_info)
return handling_cls.get_distribution_finder_impl(mfcontent_info)
@classmethod
def get_post_extract_env_vars(
cls, version_id: int, dest_dir: str = "."
) -> Dict[str, str]:
"""
Get the post-extract environment variables that are needed to access the content
that has been extracted into dest_dir.
This will typically involve setting PYTHONPATH.
Parameters
----------
version_id: int
The version of MetaflowCodeContent for this package.
dest_dir: str, default "."
The directory where the content has been extracted to.
Returns
-------
Dict[str, str]
The post-extract environment variables that are needed to access the content
that has been extracted into extracted_dir.
"""
if version_id not in cls._mappings:
raise ValueError(
"Invalid package -- unknown version %s in info: %s"
% (version_id, cls._mappings)
)
v = cls._mappings[version_id].get_post_extract_env_vars_impl(dest_dir)
v["METAFLOW_EXTRACTED_ROOT:"] = dest_dir
return v
# Implement the _impl methods in the base subclass (in this file). These need to
# happen with as few imports as possible to prevent circular dependencies.
@classmethod
def get_info_impl(
cls, mfcontent_info: Optional[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
raise NotImplementedError("get_info_impl not implemented")
@classmethod
def get_config_impl(
cls, mfcontent_info: Optional[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
raise NotImplementedError("get_config_impl not implemented")
@classmethod
def get_filename_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
filename: str,
content_type: ContentType,
) -> Optional[str]:
raise NotImplementedError("get_filename_impl not implemented")
@classmethod
def get_distribution_finder_impl(
cls, mfcontent_info: Optional[Dict[str, Any]]
) -> Optional["metaflow.extension_support.metadata.DistributionFinder"]:
raise NotImplementedError("get_distribution_finder_impl not implemented")
@classmethod
def get_archive_info_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
archive: Any,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> Optional[Dict[str, Any]]:
raise NotImplementedError("get_archive_info_impl not implemented")
@classmethod
def get_archive_config_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
archive: Any,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> Optional[Dict[str, Any]]:
raise NotImplementedError("get_archive_config_impl not implemented")
@classmethod
def get_archive_filename_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
archive: Any,
filename: str,
content_type: ContentType,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> Optional[str]:
raise NotImplementedError("get_archive_filename_impl not implemented")
@classmethod
def get_archive_content_members_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
archive: Any,
content_types: Optional[int] = None,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> List[Any]:
raise NotImplementedError("get_archive_content_members_impl not implemented")
@classmethod
def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
raise NotImplementedError("get_post_extract_env_vars_impl not implemented")
def __init_subclass__(cls, version_id, **kwargs) -> None:
super().__init_subclass__(**kwargs)
if version_id in MetaflowCodeContent._mappings:
raise ValueError(
"Version ID %s already exists in MetaflowCodeContent mappings "
"-- this is a bug in Metaflow." % str(version_id)
)
MetaflowCodeContent._mappings[version_id] = cls
cls._version_id = version_id
# Implement these methods in sub-classes of the base sub-classes. These methods
# are called later and can have more dependencies and so can live in other files.
def get_excluded_tl_entries(self) -> List[str]:
"""
When packaging Metaflow from within an executing Metaflow flow, we need to
exclude the files that are inserted by this content from being packaged (possibly).
Use this function to return these files or top-level directories.
Returns
-------
List[str]
Files or directories to exclude
"""
return []
def content_names(
self, content_types: Optional[int] = None
) -> Generator[Tuple[str, str], None, None]:
"""
Detailed list of the content of this MetaflowCodeContent. This will list all files
(or non files -- for the INFO or CONFIG data for example) present in the archive.
Parameters
----------
content_types : Optional[int]
The type of content to get the names of. If None, all content is returned.
Yields
------
Generator[Tuple[str, str], None, None]
Path on the filesystem and the name in the archive
"""
raise NotImplementedError("content_names not implemented")
def contents(
self, content_types: Optional[int] = None
) -> Generator[Tuple[Union[bytes, str], str], None, None]:
"""
Very similar to content_names but returns the content of the non-files
as well as bytes. For files, identical output as content_names
Parameters
----------
content_types : Optional[int]
The type of content to get the content of. If None, all content is returned.
Yields
------
Generator[Tuple[Union[str, bytes], str], None, None]
Content of the MF content
"""
raise NotImplementedError("content not implemented")
def show(self) -> str:
"""
Returns a more human-readable string representation of the content of this
MetaflowCodeContent. This will not, for example, list all files but summarize what
is included at a more high level.
Returns
-------
str
A human-readable string representation of the content of this MetaflowCodeContent
"""
raise NotImplementedError("show not implemented")
def add_info(self, info: Dict[str, Any]) -> None:
"""
Add the content of the INFO file to the Metaflow content
Parameters
----------
info: Dict[str, Any]
The content of the INFO file
"""
raise NotImplementedError("add_info not implemented")
def add_config(self, config: Dict[str, Any]) -> None:
"""
Add the content of the CONFIG file to the Metaflow content
Parameters
----------
config: Dict[str, Any]
The content of the CONFIG file
"""
raise NotImplementedError("add_config not implemented")
def add_module(self, module_path: ModuleType) -> None:
"""
Add a python module to the Metaflow content
Parameters
----------
module_path: ModuleType
The module to add
"""
raise NotImplementedError("add_module not implemented")
def add_code_file(self, file_path: str, file_name: str) -> None:
"""
Add a code file to the Metaflow content
Parameters
----------
file_path: str
The path to the code file to add (on the filesystem)
file_name: str
The path in the archive to add the code file to
"""
raise NotImplementedError("add_code_file not implemented")
def add_other_file(self, file_path: str, file_name: str) -> None:
"""
Add a non-python file to the Metaflow content
Parameters
----------
file_path: str
The path to the file to add (on the filesystem)
file_name: str
The path in the archive to add the file to
"""
raise NotImplementedError("add_other_file not implemented")
@classmethod
def _get_mfcontent_class(
cls, info: Optional[Dict[str, Any]]
) -> Type["MetaflowCodeContent"]:
if info is None:
return MetaflowCodeContentV0
if "version" not in info:
raise ValueError("Invalid package -- missing version in info: %s" % info)
version = info["version"]
if version not in cls._mappings:
raise ValueError(
"Invalid package -- unknown version %s in info: %s" % (version, info)
)
return cls._mappings[version]
@classmethod
def _extract_archive_mfcontent_info(
cls,
archive: Any,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> Optional[Dict[str, Any]]:
if id(archive) in cls._cached_mfcontent_info:
return cls._cached_mfcontent_info[id(archive)]
mfcontent_info = None # type: Optional[Dict[str, Any]]
# Here we need to extract the information from the archive
if packaging_backend.cls_has_member(archive, MFCONTENT_MARKER):
# The MFCONTENT_MARKER file is present in the archive
# We can extract the information from it
extracted_info = packaging_backend.cls_get_member(archive, MFCONTENT_MARKER)
if extracted_info:
mfcontent_info = json.loads(extracted_info)
cls._cached_mfcontent_info[id(archive)] = mfcontent_info
return mfcontent_info
@classmethod
def _extract_mfcontent_info(
cls, target_dir: Optional[str] = None
) -> Optional[Dict[str, Any]]:
target_dir = target_dir or "_local"
if target_dir in cls._cached_mfcontent_info:
return cls._cached_mfcontent_info[target_dir]
mfcontent_info = None # type: Optional[Dict[str, Any]]
if target_dir == "_local":
root = os.environ.get("METAFLOW_EXTRACTED_ROOT", get_metaflow_root())
else:
root = target_dir
if os.path.exists(os.path.join(root, MFCONTENT_MARKER)):
with open(os.path.join(root, MFCONTENT_MARKER), "r", encoding="utf-8") as f:
mfcontent_info = json.load(f)
cls._cached_mfcontent_info[target_dir] = mfcontent_info
return mfcontent_info
def get_package_version(self) -> int:
"""
Get the version of MetaflowCodeContent for this package.
"""
# _version_id is set in __init_subclass__ when the subclass is created
return self._version_id
class MetaflowCodeContentV0(MetaflowCodeContent, version_id=0):
@classmethod
def get_info_impl(
cls, mfcontent_info: Optional[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
path_to_file = os.path.join(get_metaflow_root(), "INFO")
if os.path.isfile(path_to_file):
with open(path_to_file, "r", encoding="utf-8") as f:
return json.load(f)
return None
@classmethod
def get_config_impl(
cls, mfcontent_info: Optional[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
path_to_file = os.path.join(get_metaflow_root(), "CONFIG")
if os.path.isfile(path_to_file):
with open(path_to_file, "r", encoding="utf-8") as f:
return json.load(f)
return None
@classmethod
def get_filename_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
filename: str,
content_type: ContentType,
) -> Optional[str]:
"""
For V0, the filename is simply the filename passed in.
"""
path_to_file = os.path.join(get_metaflow_root(), filename)
if os.path.isfile(path_to_file):
return path_to_file
return None
@classmethod
def get_distribution_finder_impl(
cls, mfcontent_info: Optional[Dict[str, Any]]
) -> Optional["metaflow.extension_support.metadata.DistributionFinder"]:
return None
@classmethod
def get_archive_info_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
archive: Any,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> Optional[Dict[str, Any]]:
info_content = packaging_backend.cls_get_member(archive, "INFO")
if info_content:
return json.loads(info_content)
return None
@classmethod
def get_archive_config_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
archive: Any,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> Optional[Dict[str, Any]]:
info_content = packaging_backend.cls_get_member(archive, "CONFIG")
if info_content:
return json.loads(info_content)
return None
@classmethod
def get_archive_filename_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
archive: Any,
filename: str,
content_type: ContentType,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> str:
if packaging_backend.cls_has_member(archive, filename):
# The file is present in the archive
return filename
return None
@classmethod
def get_archive_content_members_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
archive: Any,
content_types: Optional[int] = None,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> List[Any]:
"""
For V0, we use a static list of known files to classify the content
"""
known_prefixes = {
"metaflow/": ContentType.CODE_CONTENT.value,
"metaflow_extensions/": ContentType.CODE_CONTENT.value,
"INFO": ContentType.OTHER_CONTENT.value,
"CONFIG": ContentType.OTHER_CONTENT.value,
"conda.manifest": ContentType.OTHER_CONTENT.value,
"uv.lock": ContentType.OTHER_CONTENT.value,
"pyproject.toml": ContentType.OTHER_CONTENT.value,
# Used in nflx-metaflow-extensions
"condav2-1.cnd": ContentType.OTHER_CONTENT.value,
}
to_return = []
for member in packaging_backend.cls_list_members(archive):
filename = packaging_backend.cls_member_name(member)
added = False
for prefix, classification in known_prefixes.items():
if (
prefix[-1] == "/" and filename.startswith(prefix)
) or prefix == filename:
if content_types & classification:
to_return.append(member)
added = True
break
if not added and content_types & ContentType.USER_CONTENT.value:
# Everything else is user content
to_return.append(member)
return to_return
@classmethod
def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
return {"PYTHONPATH": dest_dir}
def get_excluded_tl_entries(self) -> List[str]:
"""
When packaging Metaflow from within an executing Metaflow flow, we need to
exclude the files that are inserted by this content from being packaged (possibly).
Use this function to return these files or top-level directories.
Returns
-------
List[str]
Files or directories to exclude
"""
return ["CONFIG", "INFO"]
# Other non-implemented methods are OK not being implemented as they will never
# be called as they are only used when creating the package and we are starting
# with V1.
class MetaflowCodeContentV1Base(MetaflowCodeContent, version_id=1):
_code_dir = ".mf_code"
_other_dir = ".mf_meta"
_info_file = "INFO"
_config_file = "CONFIG"
_dist_info_file = "DIST_INFO"
def __init_subclass__(cls, **kwargs) -> None:
# Important to add this here to prevent the subclass of MetaflowCodeContentV1Base from
# also calling __init_subclass__ in MetaflowCodeContent (which would create a problem)
return None
def __init__(self, code_dir: str, other_dir: str) -> None:
self._code_dir = code_dir
self._other_dir = other_dir
@classmethod
def _get_otherfile_path(
cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
) -> str:
if in_archive:
return os.path.join(cls._other_dir, filename)
return os.path.join(get_metaflow_root(), "..", cls._other_dir, filename)
@classmethod
def _get_codefile_path(
cls, mfcontent_info: Optional[Dict[str, Any]], filename: str, in_archive: bool
) -> str:
if in_archive:
return os.path.join(cls._code_dir, filename)
return os.path.join(get_metaflow_root(), filename)
@classmethod
def get_info_impl(
cls, mfcontent_info: Optional[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
path_to_file = cls._get_otherfile_path(
mfcontent_info, cls._info_file, in_archive=False
)
if os.path.isfile(path_to_file):
with open(path_to_file, "r", encoding="utf-8") as f:
return json.load(f)
return None
@classmethod
def get_config_impl(
cls, mfcontent_info: Optional[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
path_to_file = cls._get_otherfile_path(
mfcontent_info, cls._config_file, in_archive=False
)
if os.path.isfile(path_to_file):
with open(path_to_file, "r", encoding="utf-8") as f:
return json.load(f)
return None
@classmethod
def get_filename_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
filename: str,
content_type: ContentType,
) -> Optional[str]:
if content_type == ContentType.CODE_CONTENT:
path_to_file = cls._get_codefile_path(
mfcontent_info, filename, in_archive=False
)
elif content_type in (ContentType.OTHER_CONTENT, ContentType.MODULE_CONTENT):
path_to_file = cls._get_otherfile_path(
mfcontent_info, filename, in_archive=False
)
else:
raise ValueError(
f"Invalid content type {content_type} for filename {filename}"
)
if os.path.isfile(path_to_file):
return path_to_file
return None
@classmethod
def get_distribution_finder_impl(
cls, mfcontent_info: Optional[Dict[str, Any]]
) -> Optional["metaflow.extension_support.metadata.DistributionFinder"]:
path_to_file = cls._get_otherfile_path(
mfcontent_info, cls._dist_info_file, in_archive=False
)
if os.path.isfile(path_to_file):
with open(path_to_file, "r", encoding="utf-8") as f:
return PackagedDistributionFinder(json.load(f))
return None
@classmethod
def get_archive_info_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
archive: Any,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> Optional[Dict[str, Any]]:
info_file = packaging_backend.cls_get_member(
archive,
cls._get_otherfile_path(mfcontent_info, cls._info_file, in_archive=True),
)
if info_file:
return json.loads(info_file)
return None
@classmethod
def get_archive_config_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
archive: Any,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> Optional[Dict[str, Any]]:
config_file = packaging_backend.cls_get_member(
archive,
cls._get_otherfile_path(mfcontent_info, cls._config_file, in_archive=True),
)
if config_file:
return json.loads(config_file)
return None
@classmethod
def get_archive_filename_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
archive: Any,
filename: str,
content_type: ContentType,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> str:
if content_type == ContentType.CODE_CONTENT:
path_to_file = cls._get_codefile_path(
mfcontent_info, filename, in_archive=False
)
elif content_type in (ContentType.OTHER_CONTENT, ContentType.MODULE_CONTENT):
path_to_file = cls._get_otherfile_path(
mfcontent_info, filename, in_archive=False
)
else:
raise ValueError(
f"Invalid content type {content_type} for filename {filename}"
)
if packaging_backend.cls_has_member(archive, path_to_file):
# The file is present in the archive
return path_to_file
return None
@classmethod
def get_archive_content_members_impl(
cls,
mfcontent_info: Optional[Dict[str, Any]],
archive: Any,
content_types: Optional[int] = None,
packaging_backend: Type[PackagingBackend] = TarPackagingBackend,
) -> List[Any]:
to_return = []
module_content = set(mfcontent_info.get("module_files", []))
for member in packaging_backend.cls_list_members(archive):
filename = packaging_backend.cls_member_name(member)
if filename.startswith(cls._other_dir) and (
content_types & ContentType.OTHER_CONTENT.value
):
to_return.append(member)
elif filename.startswith(cls._code_dir):
# Special case for marker which is a other content even if in code.
if filename == MFCONTENT_MARKER:
if content_types & ContentType.OTHER_CONTENT.value:
to_return.append(member)
else:
continue
# Here it is either module or code
if os.path.join(cls._code_dir, filename) in module_content:
if content_types & ContentType.MODULE_CONTENT.value:
to_return.append(member)
elif content_types & ContentType.CODE_CONTENT.value:
to_return.append(member)
else:
if content_types & ContentType.USER_CONTENT.value:
# Everything else is user content
to_return.append(member)
return to_return
@classmethod
def get_post_extract_env_vars_impl(cls, dest_dir: str) -> Dict[str, str]:
return {"PYTHONPATH": f"{dest_dir}/{cls._code_dir}"}
================================================
FILE: metaflow/packaging_sys/backend.py
================================================
from abc import ABC, abstractmethod
from io import BytesIO
from typing import Any, IO, List, Optional, Union
class PackagingBackend(ABC):
_mappings = {}
type = "none"
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
if cls.type in cls._mappings:
raise ValueError(f"PackagingBackend {cls.type} already exists")
cls._mappings[cls.type] = cls
@classmethod
def get_backend(cls, name: str) -> "PackagingBackend":
if name not in cls._mappings:
raise ValueError(f"PackagingBackend {name} not found")
return cls._mappings[name]
@classmethod
def backend_type(cls) -> str:
return cls.type
@classmethod
@abstractmethod
def get_extract_commands(cls, archive_name: str, dest_dir: str) -> List[str]:
pass
def __init__(self):
self._archive = None
@abstractmethod
def create(self) -> "PackagingBackend":
pass
@abstractmethod
def add_file(self, filename: str, arcname: Optional[str] = None):
pass
@abstractmethod
def add_data(self, data: BytesIO, arcname: str):
pass
@abstractmethod
def close(self):
pass
@abstractmethod
def get_blob(self) -> Optional[Union[bytes, bytearray]]:
pass
@classmethod
@abstractmethod
def cls_open(cls, content: IO[bytes]) -> Any:
"""Open the archive from the given content."""
pass
@classmethod
@abstractmethod
def cls_member_name(cls, member: Union[Any, str]) -> str:
"""
Returns the name of the member as a string.
This is used to ensure consistent naming across different archive formats.
"""
pass
@classmethod
@abstractmethod
def cls_has_member(cls, archive: Any, name: str) -> bool:
pass
@classmethod
@abstractmethod
def cls_get_member(cls, archive: Any, name: str) -> Optional[bytes]:
pass
@classmethod
@abstractmethod
def cls_extract_members(
cls,
archive: Any,
members: Optional[List[Any]] = None,
dest_dir: str = ".",
) -> None:
pass
@classmethod
@abstractmethod
def cls_list_names(cls, archive: Any) -> Optional[List[str]]:
pass
@classmethod
@abstractmethod
def cls_list_members(cls, archive: Any) -> Optional[List[Any]]:
"""List all members in the archive."""
pass
def has_member(self, name: str) -> bool:
if self._archive:
return self.cls_has_member(self._archive, name)
raise ValueError("Cannot check for member in an uncreated archive")
def get_member(self, name: str) -> Optional[bytes]:
if self._archive:
return self.cls_get_member(self._archive, name)
raise ValueError("Cannot get member from an uncreated archive")
def extract_members(
self, members: Optional[List[Any]] = None, dest_dir: str = "."
) -> None:
if self._archive:
self.cls_extract_members(self._archive, members, dest_dir)
else:
raise ValueError("Cannot extract from an uncreated archive")
def list_names(self) -> Optional[List[str]]:
if self._archive:
return self.cls_list_names(self._archive)
raise ValueError("Cannot list names from an uncreated archive")
def __enter__(self):
self.create()
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
================================================
FILE: metaflow/packaging_sys/distribution_support.py
================================================
# Support saving of distribution information so we can give it back to users even
# if we do not install those distributions. This is used to package distributions in
# the MetaflowCodeContent package and provide an experience as if the packages were installed
# system-wide.
import os
import re
import sys
from pathlib import Path
from types import ModuleType
from typing import (
Callable,
Dict,
List,
Mapping,
NamedTuple,
Optional,
Set,
TYPE_CHECKING,
Union,
cast,
)
import inspect
from collections import defaultdict
from ..extension_support import metadata
from ..util import get_metaflow_root
if TYPE_CHECKING:
import pathlib
_cached_distributions = None
packages_distributions = None # type: Optional[Callable[[], Mapping[str, List[str]]]]
name_normalizer = re.compile(r"[-_.]+")
if sys.version_info[:2] >= (3, 10):
packages_distributions = metadata.packages_distributions
else:
# This is the code present in 3.10+ -- we replicate here for other versions
def _packages_distributions() -> Mapping[str, List[str]]:
"""
Return a mapping of top-level packages to their
distributions.
"""
pkg_to_dist = defaultdict(list)
for dist in metadata.distributions():
for pkg in _top_level_declared(dist) or _top_level_inferred(dist):
pkg_to_dist[pkg].append(dist.metadata["Name"])
return dict(pkg_to_dist)
def _top_level_declared(dist: metadata.Distribution) -> List[str]:
return (dist.read_text("top_level.txt") or "").split()
def _topmost(name: "pathlib.PurePosixPath") -> Optional[str]:
"""
Return the top-most parent as long as there is a parent.
"""
top, *rest = name.parts
return top if rest else None
def _get_toplevel_name(name: "pathlib.PurePosixPath") -> str:
return _topmost(name) or (
# python/typeshed#10328
inspect.getmodulename(name) # type: ignore
or str(name)
)
def _top_level_inferred(dist: "metadata.Distribution"):
opt_names = set(map(_get_toplevel_name, dist.files or []))
def importable_name(name):
return "." not in name
return filter(importable_name, opt_names)
packages_distributions = _packages_distributions
def modules_to_distributions() -> Dict[str, List[metadata.Distribution]]:
"""
Return a mapping of top-level modules to their distributions.
Returns
-------
Dict[str, List[metadata.Distribution]]
A mapping of top-level modules to their distributions.
"""
global _cached_distributions
pd = cast(Callable[[], Mapping[str, List[str]]], packages_distributions)
if _cached_distributions is None:
_cached_distributions = {
k: [metadata.distribution(d) for d in v] for k, v in pd().items()
}
return _cached_distributions
_ModuleInfo = NamedTuple(
"_ModuleInfo",
[
("name", str),
("root_paths", Set[str]),
("module", ModuleType),
("metaflow_module", bool),
],
)
class PackagedDistribution(metadata.Distribution):
"""
A Python Package packaged within a MetaflowCodeContent. This allows users to use use importlib
as they would regularly and the packaged Python Package would be considered as a
distribution even if it really isn't (since it is just included in the PythonPath).
"""
def __init__(self, root: str, content: Dict[str, str]):
self._root = Path(root)
self._content = content
# Strongly inspired from PathDistribution in metadata.py
def read_text(self, filename: Union[str, os.PathLike]) -> Optional[str]:
if str(filename) in self._content:
return self._content[str(filename)]
return None
read_text.__doc__ = metadata.Distribution.read_text.__doc__
# Returns a metadata.SimplePath but not always present in importlib.metadata libs so
# skipping return type.
def locate_file(self, path: Union[str, os.PathLike]):
return self._root / path
class PackagedDistributionFinder(metadata.DistributionFinder):
def __init__(self, dist_info: Dict[str, Dict[str, str]]):
self._dist_info = dist_info
def find_distributions(self, context=metadata.DistributionFinder.Context()):
if context.name is None:
# Yields all known distributions
for name, info in self._dist_info.items():
yield PackagedDistribution(
os.path.join(get_metaflow_root(), name), info
)
return None
name = name_normalizer.sub("-", cast(str, context.name)).lower()
if name in self._dist_info:
yield PackagedDistribution(
os.path.join(get_metaflow_root(), cast(str, context.name)),
self._dist_info[name],
)
return None
================================================
FILE: metaflow/packaging_sys/tar_backend.py
================================================
import tarfile
from io import BytesIO
from typing import Any, IO, List, Optional, Union
from .backend import PackagingBackend
class TarPackagingBackend(PackagingBackend):
type = "tgz"
@classmethod
def get_extract_commands(cls, archive_name: str, dest_dir: str) -> List[str]:
return [
f"TAR_OPTIONS='--warning=no-timestamp' tar -xzf {archive_name} -C {dest_dir}"
]
def __init__(self):
super().__init__()
self._buf = None
def create(self):
self._buf = BytesIO()
self._archive = tarfile.open(
fileobj=self._buf, mode="w:gz", compresslevel=3, dereference=True
)
return self
def add_file(self, filename: str, arcname: Optional[str] = None):
info = self._archive.gettarinfo(filename, arcname)
# Setting this default to Dec 3, 2019
info.mtime = 1575360000
with open(filename, mode="rb") as f:
self._archive.addfile(info, f)
def add_data(self, data: BytesIO, arcname: str):
info = tarfile.TarInfo(arcname)
data.seek(0)
info.size = len(data.getvalue())
# Setting this default to Dec 3, 2019
info.mtime = 1575360000
self._archive.addfile(info, data)
def close(self):
if self._archive:
self._archive.close()
def get_blob(self) -> Optional[Union[bytes, bytearray]]:
if self._buf:
blob = bytearray(self._buf.getvalue())
blob[4:8] = [0] * 4 # Reset 4 bytes from offset 4 to account for ts
return blob
return None
@classmethod
def cls_open(cls, content: IO[bytes]) -> tarfile.TarFile:
return tarfile.open(fileobj=content, mode="r:gz")
@classmethod
def cls_member_name(cls, member: Union[tarfile.TarInfo, str]) -> str:
"""
Returns the name of the member as a string.
"""
return member.name if isinstance(member, tarfile.TarInfo) else member
@classmethod
def cls_has_member(cls, archive: tarfile.TarFile, name: str) -> bool:
try:
archive.getmember(name)
return True
except KeyError:
return False
@classmethod
def cls_get_member(cls, archive: tarfile.TarFile, name: str) -> Optional[bytes]:
try:
member = archive.getmember(name)
return archive.extractfile(member).read()
except KeyError:
return None
@classmethod
def cls_extract_members(
cls,
archive: tarfile.TarFile,
members: Optional[List[Any]] = None,
dest_dir: str = ".",
) -> None:
archive.extractall(path=dest_dir, members=members)
@classmethod
def cls_list_members(
cls, archive: tarfile.TarFile
) -> Optional[List[tarfile.TarInfo]]:
return archive.getmembers() or None
@classmethod
def cls_list_names(cls, archive: tarfile.TarFile) -> Optional[List[str]]:
return archive.getnames() or None
================================================
FILE: metaflow/packaging_sys/utils.py
================================================
import os
from contextlib import contextmanager
from typing import Callable, Generator, List, Optional, Tuple
from ..util import to_unicode, walk_without_cycles
def walk(
root: str,
exclude_hidden: bool = True,
file_filter: Optional[Callable[[str], bool]] = None,
exclude_tl_dirs: Optional[List[str]] = None,
) -> Generator[Tuple[str, str], None, None]:
root = to_unicode(root) # handle files/folder with non ascii chars
prefixlen = len("%s/" % os.path.dirname(root))
for (
path,
_,
files,
) in walk_without_cycles(root, exclude_tl_dirs):
# Only check path components *under* root for hidden directories;
# ancestor directories (above root) are not relevant.
rel = path[len(root.rstrip(os.sep)) :]
if exclude_hidden and "/." in rel:
continue
# path = path[2:] # strip the ./ prefix
# if path and (path[0] == '.' or './' in path):
# continue
for fname in files:
if file_filter is None or file_filter(fname):
p = os.path.join(path, fname)
yield p, p[prefixlen:]
def suffix_filter(suffixes: List[str]) -> Callable[[str], bool]:
"""
Returns a filter function that checks if a file ends with any of the given suffixes.
"""
suffixes = [s.lower() for s in suffixes]
def _filter(fname: str) -> bool:
fname = fname.lower()
return (
suffixes is None
or (fname[0] == "." and fname in suffixes)
or (fname[0] != "." and any(fname.endswith(suffix) for suffix in suffixes))
)
return _filter
@contextmanager
def with_dir(new_dir):
current_dir = os.getcwd()
os.chdir(new_dir)
yield new_dir
os.chdir(current_dir)
================================================
FILE: metaflow/packaging_sys/v1.py
================================================
import json
import os
import sys
from pathlib import Path
from types import ModuleType
from typing import Any, Callable, Dict, Generator, List, Optional, Set, Tuple, Union
from ..debug import debug
from ..extension_support import (
EXT_EXCLUDE_SUFFIXES,
extension_info,
package_mfext_all,
package_mfext_all_descriptions,
)
from ..exception import MetaflowException
from ..metaflow_version import get_version
from ..user_decorators.user_flow_decorator import FlowMutatorMeta
from ..user_decorators.user_step_decorator import UserStepDecoratorMeta
from ..util import get_metaflow_root, walk_without_cycles
from . import ContentType, MFCONTENT_MARKER, MetaflowCodeContentV1Base
from .distribution_support import _ModuleInfo, modules_to_distributions
from .utils import suffix_filter, walk
class MetaflowCodeContentV1(MetaflowCodeContentV1Base):
METAFLOW_SUFFIXES_LIST = [".py", ".html", ".css", ".js"]
def __init__(
self,
code_dir: str = MetaflowCodeContentV1Base._code_dir,
other_dir: str = MetaflowCodeContentV1Base._other_dir,
criteria: Callable[[ModuleType], bool] = lambda x: True,
):
super().__init__(code_dir, other_dir)
self._metaflow_root = get_metaflow_root()
self._metaflow_version = get_version()
self._criteria = criteria
# We try to find the modules we need to package. We will first look at all modules
# and apply the criteria to them. Then we will use the most parent module that
# fits the criteria as the module to package
# Make a copy since sys.modules could be modified while we load other
# modules. See https://github.com/Netflix/metaflow/issues/2489
all_modules = dict(sys.modules)
modules = filter(lambda x: criteria(x[1]), all_modules.items())
# Ensure that we see the parent modules first
modules = sorted(modules, key=lambda x: x[0])
if modules:
last_prefix = modules[0][0]
new_modules = [modules[0]]
for name, mod in modules[1:]:
if name.startswith(last_prefix + "."):
# This is a submodule of the last module, we can skip it
continue
# Otherwise, we have a new top-level module
last_prefix = name
new_modules.append((name, mod))
else:
new_modules = []
self._modules = {} # type: Dict[str, _ModuleInfo]
# We do this explicitly module by module to harden it against misbehaving
# modules like the one in:
# https://github.com/Netflix/metaflow/issues/2512
# We will silently ignore modules that are not well built.
for name, mod in new_modules:
try:
minfo = _ModuleInfo(
name,
set(
Path(p).resolve().as_posix()
for p in getattr(mod, "__path__", [mod.__file__])
),
mod,
True, # This is a Metaflow module (see filter below)
)
except:
continue
self._modules[name] = minfo
# Contain metadata information regarding the distributions packaged.
# This allows Metaflow to "fake" distribution information when packaged
self._distmetainfo = {} # type: Dict[str, Dict[str, str]]
# Maps an absolute path on the filesystem to the path of the file in the
# archive.
self._files = {} # type: Dict[str, str]
self._files_from_modules = {} # type: Dict[str, str]
self._other_files = {} # type: Dict[str, str]
self._other_content = {} # type: Dict[str, bytes]
debug.package_exec(f"Used system modules found: {str(self._modules)}")
# Populate with files from the third party modules
for k, v in self._modules.items():
self._files_from_modules.update(self._module_files(k, v.root_paths))
# Figure out the files to package for Metaflow and extensions
self._cached_metaflow_files = list(self._metaflow_distribution_files())
self._cached_metaflow_files.extend(list(self._metaflow_extension_files()))
def create_mfcontent_info(self) -> Dict[str, Any]:
return {"version": 1, "module_files": list(self._files_from_modules.values())}
def get_excluded_tl_entries(self) -> List[str]:
"""
When packaging Metaflow from within an executing Metaflow flow, we need to
exclude the files that are inserted by this content from being packaged (possibly).
Use this function to return these files or top-level directories.
Returns
-------
List[str]
Files or directories to exclude
"""
return [self._code_dir, self._other_dir]
def content_names(
self, content_types: Optional[int] = None
) -> Generator[Tuple[str, str], None, None]:
"""
Detailed list of the content of this MetaflowCodeContent. This will list all files
(or non files -- for the INFO or CONFIG data for example) present in the archive.
Parameters
----------
content_types : Optional[int]
The type of content to get the names of. If None, all content is returned.
Yields
------
Generator[Tuple[str, str], None, None]
Path on the filesystem and the name in the archive
"""
yield from self._content(content_types, generate_value=False)
def contents(
self, content_types: Optional[int] = None
) -> Generator[Tuple[Union[bytes, str], str], None, None]:
"""
Very similar to content_names but returns the content of the non-files
as well as bytes. For files, identical output as content_names
Parameters
----------
content_types : Optional[int]
The type of content to get the content of. If None, all content is returned.
Yields
------
Generator[Tuple[Union[str, bytes], str], None, None]
Content of the MF content
"""
yield from self._content(content_types, generate_value=True)
def show(self) -> str:
"""
Returns a more human-readable string representation of the content of this
MetaflowCodeContent. This will not, for example, list all files but summarize what
is included at a more high level.
Returns
-------
str
A human-readable string representation of the content of this MetaflowCodeContent
"""
all_user_step_decorators = {}
for k, v in UserStepDecoratorMeta.all_decorators().items():
all_user_step_decorators.setdefault(
getattr(v, "_original_module", v.__module__), []
).append(k)
all_user_flow_decorators = {}
for k, v in FlowMutatorMeta.all_decorators().items():
all_user_flow_decorators.setdefault(
getattr(v, "_original_module", v.__module__), []
).append(k)
result = []
if self._metaflow_version:
result.append(f"\nMetaflow version: {self._metaflow_version}")
ext_info = extension_info()
if ext_info["installed"]:
result.append("\nMetaflow extensions packaged:")
for ext_name, ext_info in ext_info["installed"].items():
result.append(
f" - {ext_name} ({ext_info['extension_name']}) @ {ext_info['dist_version']}"
)
if self._modules:
mf_modules = []
other_modules = []
for name, info in self._modules.items():
if info.metaflow_module:
mf_modules.append(f" - {name} @ {', '.join(info.root_paths)}")
module_user_step_decorators = [
", ".join(v)
for k, v in all_user_step_decorators.items()
if k == info.name or k.startswith(info.name + ".")
]
module_user_flow_decorators = [
", ".join(v)
for k, v in all_user_flow_decorators.items()
if k == info.name or k.startswith(info.name + ".")
]
if module_user_step_decorators:
mf_modules.append(
f" - Provides step decorators: {', '.join(module_user_step_decorators)}"
)
if module_user_flow_decorators:
mf_modules.append(
f" - Provides flow mutators: {', '.join(module_user_flow_decorators)}"
)
else:
other_modules.append(f" - {name} @ {', '.join(info.root_paths)}")
if mf_modules:
result.append("\nMetaflow modules:")
result.extend(mf_modules)
if other_modules:
result.append("\nNon-Metaflow packaged modules:")
result.extend(other_modules)
return "\n".join(result)
def add_info(self, info: Dict[str, Any]) -> None:
"""
Add the content of the INFO file to the Metaflow content
Parameters
----------
info: Dict[str, Any]
The content of the INFO file
"""
info_file_path = os.path.join(self._other_dir, self._info_file)
if info_file_path in self._other_content:
raise MetaflowException("INFO file already present in the MF environment")
self._other_content[info_file_path] = json.dumps(info).encode("utf-8")
def add_config(self, config: Dict[str, Any]) -> None:
"""
Add the content of the CONFIG file to the Metaflow content
Parameters
----------
config: Dict[str, Any]
The content of the CONFIG file
"""
config_file_path = os.path.join(self._other_dir, self._config_file)
if config_file_path in self._other_content:
raise MetaflowException("CONFIG file already present in the MF environment")
self._other_content[config_file_path] = json.dumps(config).encode("utf-8")
def add_module(self, module: ModuleType) -> None:
"""
Add a python module to the Metaflow content
Parameters
----------
module_path: ModuleType
The module to add
"""
name = module.__name__
debug.package_exec(f"Adding module {name} to the MF content")
# If the module is a single file, we handle this here by looking at __file__
# which will point to the single file. If it is an actual module, __path__
# will contain the path(s) to the module
if hasattr(module, "__file__") and module.__file__:
root_paths = [Path(module.__file__).resolve().as_posix()]
else:
root_paths = []
seen_path_values = set()
new_paths = module.__spec__.submodule_search_locations
while new_paths:
paths = new_paths
new_paths = []
for p in paths:
if p in seen_path_values:
continue
if os.path.isdir(p):
root_paths.append(Path(p).resolve().as_posix())
elif p in sys.path_importer_cache:
# We have a path hook that we likely need to call to get the actual path
addl_spec = sys.path_importer_cache[p].find_spec(name)
if (
addl_spec is not None
and addl_spec.submodule_search_locations
):
new_paths.extend(addl_spec.submodule_search_locations)
else:
# This may not be as required since it is likely the importer cache has
# everything already but just in case, we will also go through the
# path hooks and see if we find another one
for path_hook in sys.path_hooks:
try:
finder = path_hook(p)
addl_spec = finder.find_spec(name)
if (
addl_spec is not None
and addl_spec.submodule_search_locations
):
new_paths.extend(
addl_spec.submodule_search_locations
)
break
except ImportError:
continue
seen_path_values.add(p)
self._modules[name] = _ModuleInfo(
name,
set(root_paths),
module,
False, # This is not a Metaflow module (added by the user manually)
)
self._files_from_modules.update(
self._module_files(name, self._modules[name].root_paths)
)
def add_code_file(self, file_path: str, file_name: str) -> None:
"""
Add a code file to the Metaflow content
Parameters
----------
file_path: str
The path to the code file to add (on the filesystem)
file_name: str
The path in the archive to add the code file to
"""
file_path = os.path.realpath(file_path)
debug.package_exec(
f"Adding code file {file_path} as {file_name} to the MF content"
)
if file_path in self._files and self._files[file_path] != os.path.join(
self._code_dir, file_name.lstrip("/")
):
raise MetaflowException(
"File '%s' is already present in the MF content with a different name: '%s'"
% (file_path, self._files[file_path])
)
self._files[file_path] = os.path.join(self._code_dir, file_name.lstrip("/"))
def add_other_file(self, file_path: str, file_name: str) -> None:
"""
Add a non-python file to the Metaflow content
Parameters
----------
file_path: str
The path to the file to add (on the filesystem)
file_name: str
The path in the archive to add the file to
"""
file_path = os.path.realpath(file_path)
debug.package_exec(
f"Adding other file {file_path} as {file_name} to the MF content"
)
if file_path in self._other_files and self._other_files[
file_path
] != os.path.join(self._other_dir, file_name.lstrip("/")):
raise MetaflowException(
"File %s is already present in the MF content with a different name: %s"
% (file_path, self._other_files[file_path])
)
self._other_files[file_path] = os.path.join(
self._other_dir, file_name.lstrip("/")
)
def _content(
self, content_types: Optional[int] = None, generate_value: bool = False
) -> Generator[Tuple[Union[str, bytes], str], None, None]:
from ..package import MetaflowPackage # Prevent circular dependency
if content_types is None:
content_types = ContentType.ALL_CONTENT.value
if content_types & ContentType.CODE_CONTENT.value:
yield from self._cached_metaflow_files
yield from self._files.items()
if content_types & ContentType.MODULE_CONTENT.value:
yield from self._files_from_modules.items()
if content_types & ContentType.OTHER_CONTENT.value:
yield from self._other_files.items()
if generate_value:
for k, v in self._other_content.items():
yield v, k
# Include the distribution file too
yield json.dumps(self._distmetainfo).encode("utf-8"), os.path.join(
self._other_dir, self._dist_info_file
)
yield json.dumps(self.create_mfcontent_info()).encode(
"utf-8"
), MFCONTENT_MARKER
else:
for k in self._other_content.keys():
yield "" % (os.path.basename(k)), k
yield "" % (
os.path.basename(self._dist_info_file)
), os.path.join(self._other_dir, self._dist_info_file)
yield "" % MFCONTENT_MARKER, MFCONTENT_MARKER
def _metaflow_distribution_files(self) -> Generator[Tuple[str, str], None, None]:
debug.package_exec("Including Metaflow from '%s'" % self._metaflow_root)
for path_tuple in walk(
os.path.join(self._metaflow_root, "metaflow"),
exclude_hidden=False,
file_filter=suffix_filter(self.METAFLOW_SUFFIXES_LIST),
):
yield path_tuple[0], os.path.join(self._code_dir, path_tuple[1])
def _metaflow_extension_files(self) -> Generator[Tuple[str, str], None, None]:
# Metaflow extensions; for now, we package *all* extensions but this may change
# at a later date; it is possible to call `package_mfext_package` instead of
# `package_mfext_all` but in that case, make sure to also add a
# metaflow_extensions/__init__.py file to properly "close" the metaflow_extensions
# package and prevent other extensions from being loaded that may be
# present in the rest of the system
for path_tuple in package_mfext_all():
yield path_tuple[0], os.path.join(self._code_dir, path_tuple[1])
if debug.package:
ext_info = package_mfext_all_descriptions()
ext_info = {
k: {k1: v1 for k1, v1 in v.items() if k1 in ("root_paths",)}
for k, v in ext_info.items()
}
debug.package_exec(f"Metaflow extensions packaged: {ext_info}")
def _module_files(
self, name: str, paths: Set[str]
) -> Generator[Tuple[str, str], None, None]:
debug.package_exec(
" Looking for distributions for module %s in %s" % (name, paths)
)
paths = set(paths) # Do not modify external paths
has_init = False
distributions = modules_to_distributions().get(name)
prefix_parts = tuple(name.split("."))
seen_distributions = set()
if distributions:
for dist in distributions:
dist_name = dist.metadata["Name"] # dist.name not always present
if dist_name in seen_distributions:
continue
# For some reason, sometimes the same distribution appears twice. We
# don't need to process twice.
seen_distributions.add(dist_name)
debug.package_exec(
" Including distribution '%s' for module '%s'"
% (dist_name, name)
)
dist_root = str(dist.locate_file(name))
has_file_in_root = False
if dist_name not in self._distmetainfo:
# Possible that a distribution contributes to multiple modules
self._distmetainfo[dist_name] = {
# We can add more if needed but these are likely the most
# useful (captures, name, version, etc and files which can
# be used to find non-python files in the distribution).
"METADATA": dist.read_text("METADATA") or "",
"RECORD": dist.read_text("RECORD") or "",
}
for file in dist.files or []:
# Skip files that do not belong to this module (distribution may
# provide multiple modules)
if (
file.parts[: len(prefix_parts)] != prefix_parts
or file.suffix == ".pth"
or str(file).startswith("__editable__")
):
continue
if file.parts[len(prefix_parts)] == "__init__.py":
has_init = True
has_file_in_root = True
# At this point, we know that we are seeing actual files in the
# dist_root so we make sure it is as expected
if dist_root not in paths:
# This is an error because it means that this distribution is
# not contributing to the module.
raise RuntimeError(
"Distribution '%s' is not contributing to module '%s' as "
"expected (got '%s' when expected one of %s)"
% (dist.metadata["Name"], name, dist_root, paths)
)
yield str(
dist.locate_file(file).resolve().as_posix()
), os.path.join(self._code_dir, *prefix_parts, *file.parts[1:])
if has_file_in_root:
paths.discard(dist_root)
# Now if there are more paths left in paths, it means there is a non-distribution
# component to this package which we also include.
debug.package_exec(
" Looking for non-distribution files for module '%s' in %s"
% (name, paths)
)
for path in paths:
if not Path(path).is_dir():
# Single file for the module -- this will be something like .py
yield path, os.path.join(
self._code_dir, *prefix_parts[:-1], f"{prefix_parts[-1]}.py"
)
has_init = True
else:
for root, _, files in walk_without_cycles(path):
for file in files:
if any(file.endswith(x) for x in EXT_EXCLUDE_SUFFIXES):
continue
rel_path = os.path.relpath(os.path.join(root, file), path)
if rel_path == "__init__.py":
has_init = True
yield os.path.join(root, file), os.path.join(
self._code_dir,
name,
rel_path,
)
# We now include an empty __init__.py file to close the module and prevent
# leaks from possible namespace packages
if not has_init:
yield os.path.join(
self._metaflow_root, "metaflow", "extension_support", "_empty_file.py"
), os.path.join(self._code_dir, *prefix_parts, "__init__.py")
================================================
FILE: metaflow/parameters.py
================================================
import json
from contextlib import contextmanager
from threading import local
from typing import Any, Callable, Dict, NamedTuple, Optional, TYPE_CHECKING, Type, Union
from metaflow._vendor import click
from .util import get_username, is_stringish
from .exception import (
ParameterFieldFailed,
ParameterFieldTypeMismatch,
MetaflowException,
)
if TYPE_CHECKING:
from .user_configs.config_parameters import ConfigValue
try:
# Python2
strtype = basestring
except NameError:
# Python3
strtype = str
# ParameterContext allows deploy-time functions modify their
# behavior based on the context. We can add fields here without
# breaking backwards compatibility but don't remove any fields!
ParameterContext = NamedTuple(
"ParameterContext",
[
("flow_name", str),
("user_name", str),
("parameter_name", str),
("logger", Callable[..., None]),
("ds_type", str),
("configs", Optional["ConfigValue"]),
],
)
# When we launch a flow, we need to know the parameters so we can
# attach them with add_custom_parameters to commands. This used to be a global
# but causes problems when multiple FlowSpec are loaded (as can happen when using
# the Runner or just if multiple Flows are defined and instantiated). To minimally
# impact code, we now create the CLI with a thread local value of the FlowSpec
# that is being used to create the CLI which enables us to extract the parameters
# directly from the Flow.
current_flow = local()
@contextmanager
def flow_context(flow_cls):
"""
Context manager to set the current flow for the thread. This is used
to extract the parameters from the FlowSpec that is being used to create
the CLI.
"""
# Use a stack because with the runner this can get called multiple times in
# a nested fashion
current_flow.flow_cls_stack = getattr(current_flow, "flow_cls_stack", [])
current_flow.flow_cls_stack.insert(0, flow_cls)
current_flow.flow_cls = current_flow.flow_cls_stack[0]
try:
yield
finally:
current_flow.flow_cls_stack = current_flow.flow_cls_stack[1:]
if len(current_flow.flow_cls_stack) == 0:
del current_flow.flow_cls_stack
del current_flow.flow_cls
else:
current_flow.flow_cls = current_flow.flow_cls_stack[0]
context_proto = None
def replace_flow_context(flow_cls):
"""
Replace the current flow context with a new flow class. This is used
when we change the current flow class after having run user configuration functions
"""
current_flow.flow_cls_stack = current_flow.flow_cls_stack[1:]
current_flow.flow_cls_stack.insert(0, flow_cls)
current_flow.flow_cls = current_flow.flow_cls_stack[0]
class JSONTypeClass(click.ParamType):
name = "JSON"
def convert(self, value, param, ctx):
if not isinstance(value, strtype):
# Already a correct type
return value
try:
return json.loads(value)
except:
self.fail("%s is not a valid JSON object" % value, param, ctx)
def __str__(self):
return repr(self)
def __repr__(self):
return "JSON"
class DeployTimeField(object):
"""
This a wrapper object for a user-defined function that is called
at deploy time to populate fields in a Parameter. The wrapper
is needed to make Click show the actual value returned by the
function instead of a function pointer in its help text. Also, this
object curries the context argument for the function, and pretty
prints any exceptions that occur during evaluation.
"""
def __init__(
self,
parameter_name,
parameter_type,
field,
fun,
return_str=True,
print_representation=None,
):
self.fun = fun
self.field = field
self.parameter_name = parameter_name
self.parameter_type = parameter_type
self.return_str = return_str
self.print_representation = self.user_print_representation = (
print_representation
)
if self.print_representation is None:
self.print_representation = str(self.fun)
def __call__(self, deploy_time=False):
# This is called in two ways:
# - through the normal Click default parameter evaluation: if a default
# value is a callable, Click will call it without any argument. In other
# words, deploy_time=False. This happens for a normal "run" or the "trigger"
# functions for step-functions for example. Anything that has the
# @add_custom_parameters decorator will trigger this. Once click calls this,
# it will then pass the resulting value to the convert() functions for the
# type for that Parameter.
# - by deploy_time_eval which is invoked to process the parameters at
# deploy_time and outside of click processing (ie: at that point, Click
# is not involved since anytime deploy_time_eval is called, no custom parameters
# have been added). In that situation, deploy_time will be True. Note that in
# this scenario, the value should be something that can be converted to JSON.
# The deploy_time value can therefore be used to determine which type of
# processing is requested.
ctx = context_proto._replace(parameter_name=self.parameter_name)
try:
try:
# Most user-level functions may not care about the deploy_time parameter
# but IncludeFile does.
val = self.fun(ctx, deploy_time)
except TypeError:
val = self.fun(ctx)
except:
raise ParameterFieldFailed(self.parameter_name, self.field)
else:
return self._check_type(val, deploy_time)
def _check_type(self, val, deploy_time):
# it is easy to introduce a deploy-time function that accidentally
# returns a value whose type is not compatible with what is defined
# in Parameter. Let's catch those mistakes early here, instead of
# showing a cryptic stack trace later.
# note: this doesn't work with long in Python2 or types defined as
# click types, e.g. click.INT
TYPES = {bool: "bool", int: "int", float: "float", list: "list", dict: "dict"}
msg = (
"The value returned by the deploy-time function for "
"the parameter *%s* field *%s* has a wrong type. "
% (self.parameter_name, self.field)
)
if isinstance(self.parameter_type, list):
if not any(isinstance(val, x) for x in self.parameter_type):
msg += "Expected one of the following %s." % TYPES[self.parameter_type]
raise ParameterFieldTypeMismatch(msg)
return str(val) if self.return_str else val
elif self.parameter_type in TYPES:
if type(val) != self.parameter_type:
msg += "Expected a %s." % TYPES[self.parameter_type]
raise ParameterFieldTypeMismatch(msg)
return str(val) if self.return_str else val
else:
if deploy_time:
try:
if not is_stringish(val):
val = json.dumps(val)
except TypeError:
msg += "Expected a JSON-encodable object or a string."
raise ParameterFieldTypeMismatch(msg)
return val
# If not deploy_time, we expect a string
if not is_stringish(val):
msg += "Expected a string."
raise ParameterFieldTypeMismatch(msg)
return val
@property
def description(self):
return self.print_representation
def __str__(self):
if self.user_print_representation:
return self.user_print_representation
return self()
def __repr__(self):
if self.user_print_representation:
return self.user_print_representation
return self()
def deploy_time_eval(value):
if isinstance(value, DeployTimeField):
return value(deploy_time=True)
elif isinstance(value, DelayedEvaluationParameter):
return value(return_str=True)
else:
return value
# this is called by cli.main
def set_parameter_context(flow_name, echo, datastore, configs):
from .user_configs.config_parameters import (
ConfigValue,
) # Prevent circular dependency
global context_proto
context_proto = ParameterContext(
flow_name=flow_name,
user_name=get_username(),
parameter_name=None,
logger=echo,
ds_type=datastore.TYPE,
configs=ConfigValue(dict(configs)),
)
class DelayedEvaluationParameter(object):
"""
This is a very simple wrapper to allow parameter "conversion" to be delayed until
the `_set_constants` function in FlowSpec. Typically, parameters are converted
by click when the command line option is processed. For some parameters, like
IncludeFile, this is too early as it would mean we would trigger the upload
of the file too early. If a parameter converts to a DelayedEvaluationParameter
object through the usual click mechanisms, `_set_constants` knows to invoke the
__call__ method on that DelayedEvaluationParameter; in that case, the __call__
method is invoked without any parameter. The return_str parameter will be used
by schedulers when they need to convert DelayedEvaluationParameters to a
string to store them
"""
def __init__(self, name, field, fun):
self._name = name
self._field = field
self._fun = fun
def __call__(self, return_str=False):
try:
return self._fun(return_str=return_str)
except Exception as e:
raise ParameterFieldFailed(self._name, self._field)
class Parameter(object):
"""
Defines a parameter for a flow.
Parameters must be instantiated as class variables in flow classes, e.g.
```
class MyFlow(FlowSpec):
param = Parameter('myparam')
```
in this case, the parameter is specified on the command line as
```
python myflow.py run --myparam=5
```
and its value is accessible through a read-only artifact like this:
```
print(self.param == 5)
```
Note that the user-visible parameter name, `myparam` above, can be
different from the artifact name, `param` above.
The parameter value is converted to a Python type based on the `type`
argument or to match the type of `default`, if it is set.
Parameters
----------
name : str
User-visible parameter name.
default : Union[str, float, int, bool, Dict[str, Any],
Callable[
[ParameterContext], Union[str, float, int, bool, Dict[str, Any]]
],
], optional, default None
Default value for the parameter. Use a special `JSONType` class to
indicate that the value must be a valid JSON object. A function
implies that the parameter corresponds to a *deploy-time parameter*.
The type of the default value is used as the parameter `type`.
type : Type, default None
If `default` is not specified, define the parameter type. Specify
one of `str`, `float`, `int`, `bool`, or `JSONType`. If None, defaults
to the type of `default` or `str` if none specified.
help : str, optional, default None
Help text to show in `run --help`.
required : bool, optional, default None
Require that the user specifies a value for the parameter. Note that if
a default is provide, the required flag is ignored.
A value of None is equivalent to False.
show_default : bool, optional, default None
If True, show the default value in the help text. A value of None is equivalent
to True.
"""
IS_CONFIG_PARAMETER = False
def __init__(
self,
name: str,
default: Optional[
Union[
str,
float,
int,
bool,
Dict[str, Any],
Callable[
[ParameterContext], Union[str, float, int, bool, Dict[str, Any]]
],
]
] = None,
type: Optional[
Union[Type[str], Type[float], Type[int], Type[bool], JSONTypeClass]
] = None,
help: Optional[str] = None,
required: Optional[bool] = None,
show_default: Optional[bool] = None,
**kwargs: Dict[str, Any],
):
self.name = name
self.kwargs = kwargs
self._override_kwargs = {
"default": default,
"type": type,
"help": help,
"required": required,
"show_default": show_default,
}
def init(self, ignore_errors=False):
# Prevent circular import
from .user_configs.config_parameters import (
resolve_delayed_evaluator,
unpack_delayed_evaluator,
)
# Resolve any value from configurations
self.kwargs, _ = unpack_delayed_evaluator(
self.kwargs, ignore_errors=ignore_errors
)
# Do it one item at a time so errors are ignored at that level (as opposed to
# at the entire kwargs level)
self.kwargs = {
k: resolve_delayed_evaluator(v, ignore_errors=ignore_errors, to_dict=True)
for k, v in self.kwargs.items()
}
# This was the behavior before configs: values specified in args would override
# stuff in kwargs which is what we implement here as well
for key, value in self._override_kwargs.items():
if value is not None:
self.kwargs[key] = resolve_delayed_evaluator(
value, ignore_errors=ignore_errors, to_dict=True
)
# Set two default values if no-one specified them
self.kwargs.setdefault("required", False)
self.kwargs.setdefault("show_default", True)
# Continue processing kwargs free of any configuration values :)
# TODO: check that the type is one of the supported types
param_type = self.kwargs["type"] = self._get_type(self.kwargs)
reserved_params = [
"params",
"with",
"tag",
"namespace",
"obj",
"tags",
"decospecs",
"run-id-file",
"max-num-splits",
"max-workers",
"max-log-size",
"user-namespace",
"run-id",
"task-id",
"runner-attribute-file",
]
reserved = set(reserved_params)
# due to the way Click maps cli args to function args we also want to add underscored params to the set
for param in reserved_params:
reserved.add(param.replace("-", "_"))
if self.name in reserved:
raise MetaflowException(
"Parameter name '%s' is a reserved "
"word. Please use a different "
"name for your parameter." % (self.name)
)
# make sure the user is not trying to pass a function in one of the
# fields that don't support function-values yet
for field in ("show_default", "separator", "required"):
if callable(self.kwargs.get(field)):
raise MetaflowException(
"Parameter *%s*: Field '%s' cannot "
"have a function as its value" % (self.name, field)
)
# default can be defined as a function
default_field = self.kwargs.get("default")
if callable(default_field) and not isinstance(default_field, DeployTimeField):
self.kwargs["default"] = DeployTimeField(
self.name,
param_type,
"default",
self.kwargs["default"],
return_str=True,
)
# note that separator doesn't work with DeployTimeFields unless you
# specify type=str
self.separator = self.kwargs.pop("separator", None)
if self.separator and not self.is_string_type:
raise MetaflowException(
"Parameter *%s*: Separator is only allowed "
"for string parameters." % self.name
)
def __repr__(self):
return "metaflow.Parameter(name=%s, kwargs=%s)" % (self.name, self.kwargs)
def __str__(self):
return "metaflow.Parameter(name=%s, kwargs=%s)" % (self.name, self.kwargs)
def option_kwargs(self, deploy_mode):
kwargs = self.kwargs
if isinstance(kwargs.get("default"), DeployTimeField) and not deploy_mode:
ret = dict(kwargs)
help_msg = kwargs.get("help")
help_msg = "" if help_msg is None else help_msg
ret["help"] = help_msg + "[default: deploy-time value of '%s']" % self.name
ret["default"] = None
ret["required"] = False
return ret
else:
return kwargs
def load_parameter(self, v):
return v
def _get_type(self, kwargs):
default_type = str
default = kwargs.get("default")
if default is not None and not callable(default):
default_type = type(default)
return kwargs.get("type", default_type)
@property
def is_string_type(self):
return self.kwargs.get("type", str) == str and isinstance(
self.kwargs.get("default", ""), strtype
)
# this is needed to appease Pylint for JSONType'd parameters,
# which may do self.param['foobar']
def __getitem__(self, x):
pass
def add_custom_parameters(deploy_mode=False):
# deploy_mode determines whether deploy-time functions should or should
# not be evaluated for this command
def wrapper(cmd):
# Save the original params once, if they haven't been saved before.
if not hasattr(cmd, "original_params"):
cmd.original_params = list(cmd.params)
cmd.has_flow_params = True
# Iterate over parameters in reverse order so cmd.params lists options
# in the order they are defined in the FlowSpec subclass
flow_cls = getattr(current_flow, "flow_cls", None)
if flow_cls is None:
return cmd
parameters = [
p for _, p in flow_cls._get_parameters() if not p.IS_CONFIG_PARAMETER
]
for arg in parameters[::-1]:
kwargs = arg.option_kwargs(deploy_mode)
cmd.params.insert(0, click.Option(("--" + arg.name,), **kwargs))
return cmd
return wrapper
JSONType = JSONTypeClass()
================================================
FILE: metaflow/plugins/__init__.py
================================================
import sys
from metaflow.extension_support.plugins import (
merge_lists,
process_plugins,
resolve_plugins,
)
# Add new CLI commands here
CLIS_DESC = [
("package", ".package_cli.cli"),
("batch", ".aws.batch.batch_cli.cli"),
("kubernetes", ".kubernetes.kubernetes_cli.cli"),
("step-functions", ".aws.step_functions.step_functions_cli.cli"),
("airflow", ".airflow.airflow_cli.cli"),
("argo-workflows", ".argo.argo_workflows_cli.cli"),
("card", ".cards.card_cli.cli"),
("tag", ".tag_cli.cli"),
("spot-metadata", ".kubernetes.spot_metadata_cli.cli"),
("logs", ".logs_cli.cli"),
]
# Add additional commands to the runner here
# These will be accessed using Runner().()
RUNNER_CLIS_DESC = []
from .test_unbounded_foreach_decorator import InternalTestUnboundedForeachInput
# Add new step decorators here
STEP_DECORATORS_DESC = [
("catch", ".catch_decorator.CatchDecorator"),
("timeout", ".timeout_decorator.TimeoutDecorator"),
("environment", ".environment_decorator.EnvironmentDecorator"),
("secrets", ".secrets.secrets_decorator.SecretsDecorator"),
("parallel", ".parallel_decorator.ParallelDecorator"),
("retry", ".retry_decorator.RetryDecorator"),
("resources", ".resources_decorator.ResourcesDecorator"),
("batch", ".aws.batch.batch_decorator.BatchDecorator"),
("kubernetes", ".kubernetes.kubernetes_decorator.KubernetesDecorator"),
(
"argo_workflows_internal",
".argo.argo_workflows_decorator.ArgoWorkflowsInternalDecorator",
),
(
"step_functions_internal",
".aws.step_functions.step_functions_decorator.StepFunctionsInternalDecorator",
),
(
"unbounded_test_foreach_internal",
".test_unbounded_foreach_decorator.InternalTestUnboundedForeachDecorator",
),
("card", ".cards.card_decorator.CardDecorator"),
("pytorch_parallel", ".frameworks.pytorch.PytorchParallelDecorator"),
("airflow_internal", ".airflow.airflow_decorator.AirflowInternalDecorator"),
("pypi", ".pypi.pypi_decorator.PyPIStepDecorator"),
("conda", ".pypi.conda_decorator.CondaStepDecorator"),
]
# Add new flow decorators here
# Every entry here becomes a class-level flow decorator.
# Add an entry here if you need a new flow-level annotation. Be
# careful with the choice of name though - they become top-level
# imports from the metaflow package.
FLOW_DECORATORS_DESC = [
("schedule", ".aws.step_functions.schedule_decorator.ScheduleDecorator"),
("project", ".project_decorator.ProjectDecorator"),
("trigger", ".events_decorator.TriggerDecorator"),
("trigger_on_finish", ".events_decorator.TriggerOnFinishDecorator"),
("pypi_base", ".pypi.pypi_decorator.PyPIFlowDecorator"),
("conda_base", ".pypi.conda_decorator.CondaFlowDecorator"),
("exit_hook", ".exit_hook.exit_hook_decorator.ExitHookDecorator"),
]
# Add environments here
ENVIRONMENTS_DESC = [
("conda", ".pypi.conda_environment.CondaEnvironment"),
("pypi", ".pypi.pypi_environment.PyPIEnvironment"),
("uv", ".uv.uv_environment.UVEnvironment"),
]
# Add metadata providers here
METADATA_PROVIDERS_DESC = [
("service", ".metadata_providers.service.ServiceMetadataProvider"),
("local", ".metadata_providers.local.LocalMetadataProvider"),
("spin", ".metadata_providers.spin.SpinMetadataProvider"),
]
# Add datastore here
DATASTORES_DESC = [
("local", ".datastores.local_storage.LocalStorage"),
("spin", ".datastores.spin_storage.SpinStorage"),
("s3", ".datastores.s3_storage.S3Storage"),
("azure", ".datastores.azure_storage.AzureStorage"),
("gs", ".datastores.gs_storage.GSStorage"),
]
# Dataclients are used for IncludeFile
DATACLIENTS_DESC = [
("local", ".datatools.Local"),
("s3", ".datatools.S3"),
("azure", ".azure.includefile_support.Azure"),
("gs", ".gcp.includefile_support.GS"),
]
# Add non monitoring/logging sidecars here
SIDECARS_DESC = [
(
"save_logs_periodically",
"..mflog.save_logs_periodically.SaveLogsPeriodicallySidecar",
),
(
"spot_termination_monitor",
".kubernetes.spot_monitor_sidecar.SpotTerminationMonitorSidecar",
),
("heartbeat", "metaflow.metadata_provider.heartbeat.MetadataHeartBeat"),
]
# Add logging sidecars here
LOGGING_SIDECARS_DESC = [
("debugLogger", ".debug_logger.DebugEventLogger"),
("nullSidecarLogger", "metaflow.event_logger.NullEventLogger"),
]
# Add monitor sidecars here
MONITOR_SIDECARS_DESC = [
("debugMonitor", ".debug_monitor.DebugMonitor"),
("nullSidecarMonitor", "metaflow.monitor.NullMonitor"),
]
# Add AWS client providers here
AWS_CLIENT_PROVIDERS_DESC = [("boto3", ".aws.aws_client.Boto3ClientProvider")]
# Add Airflow sensor related flow decorators
SENSOR_FLOW_DECORATORS = [
("airflow_external_task_sensor", ".airflow.sensors.ExternalTaskSensorDecorator"),
("airflow_s3_key_sensor", ".airflow.sensors.S3KeySensorDecorator"),
]
FLOW_DECORATORS_DESC += SENSOR_FLOW_DECORATORS
SECRETS_PROVIDERS_DESC = [
("inline", ".secrets.inline_secrets_provider.InlineSecretsProvider"),
(
"aws-secrets-manager",
".aws.secrets_manager.aws_secrets_manager_secrets_provider.AwsSecretsManagerSecretsProvider",
),
(
"gcp-secret-manager",
".gcp.gcp_secret_manager_secrets_provider.GcpSecretManagerSecretsProvider",
),
(
"az-key-vault",
".azure.azure_secret_manager_secrets_provider.AzureKeyVaultSecretsProvider",
),
]
GCP_CLIENT_PROVIDERS_DESC = [
("gcp-default", ".gcp.gs_storage_client_factory.GcpDefaultClientProvider")
]
AZURE_CLIENT_PROVIDERS_DESC = [
("azure-default", ".azure.azure_credential.AzureDefaultClientProvider")
]
DEPLOYER_IMPL_PROVIDERS_DESC = [
("argo-workflows", ".argo.argo_workflows_deployer.ArgoWorkflowsDeployer"),
(
"step-functions",
".aws.step_functions.step_functions_deployer.StepFunctionsDeployer",
),
]
TL_PLUGINS_DESC = [
("yaml_parser", ".parsers.yaml_parser"),
("requirements_txt_parser", ".pypi.parsers.requirements_txt_parser"),
("namespaced_event_name", ".namespaced_events.namespaced_event_name"),
("pyproject_toml_parser", ".pypi.parsers.pyproject_toml_parser"),
("conda_environment_yml_parser", ".pypi.parsers.conda_environment_yml_parser"),
]
process_plugins(globals())
def get_plugin_cli():
return resolve_plugins("cli")
def get_plugin_cli_path():
return resolve_plugins("cli", path_only=True)
def get_runner_cli():
return resolve_plugins("runner_cli")
def get_runner_cli_path():
return resolve_plugins("runner_cli", path_only=True)
STEP_DECORATORS = resolve_plugins("step_decorator")
FLOW_DECORATORS = resolve_plugins("flow_decorator")
ENVIRONMENTS = resolve_plugins("environment")
METADATA_PROVIDERS = resolve_plugins("metadata_provider")
DATASTORES = resolve_plugins("datastore")
DATACLIENTS = resolve_plugins("dataclient")
SIDECARS = resolve_plugins("sidecar")
LOGGING_SIDECARS = resolve_plugins("logging_sidecar")
MONITOR_SIDECARS = resolve_plugins("monitor_sidecar")
SIDECARS.update(LOGGING_SIDECARS)
SIDECARS.update(MONITOR_SIDECARS)
AWS_CLIENT_PROVIDERS = resolve_plugins("aws_client_provider")
SECRETS_PROVIDERS = resolve_plugins("secrets_provider")
AZURE_CLIENT_PROVIDERS = resolve_plugins("azure_client_provider")
GCP_CLIENT_PROVIDERS = resolve_plugins("gcp_client_provider")
if sys.version_info >= (3, 7):
DEPLOYER_IMPL_PROVIDERS = resolve_plugins("deployer_impl_provider")
TL_PLUGINS = resolve_plugins("tl_plugin")
from .cards.card_modules import MF_EXTERNAL_CARDS
# Cards; due to the way cards were designed, it is harder to make them fit
# in the resolve_plugins mechanism. This should be OK because it is unlikely that
# cards will need to be *removed*. No card should be too specific (for example, no
# card should be something just for Airflow, or Argo or step-functions -- those should
# be added externally).
from .cards.card_modules.basic import (
BlankCard,
DefaultCard,
DefaultCardJSON,
ErrorCard,
TaskSpecCard,
)
from .cards.card_modules.test_cards import (
TestEditableCard,
TestEditableCard2,
TestErrorCard,
TestMockCard,
TestNonEditableCard,
TestPathSpecCard,
TestTimeoutCard,
TestRefreshCard,
TestRefreshComponentCard,
TestImageCard,
)
CARDS = [
DefaultCard,
TaskSpecCard,
ErrorCard,
BlankCard,
TestErrorCard,
TestTimeoutCard,
TestMockCard,
TestPathSpecCard,
TestEditableCard,
TestEditableCard2,
TestNonEditableCard,
BlankCard,
DefaultCardJSON,
TestRefreshCard,
TestRefreshComponentCard,
TestImageCard,
]
merge_lists(CARDS, MF_EXTERNAL_CARDS, "type")
def _import_tl_plugins(globals_dict):
for name, p in TL_PLUGINS.items():
globals_dict[name] = p
================================================
FILE: metaflow/plugins/airflow/__init__.py
================================================
================================================
FILE: metaflow/plugins/airflow/airflow.py
================================================
import json
import os
import random
import string
import sys
from datetime import datetime, timedelta
from io import BytesIO
import metaflow.util as util
from metaflow import current
from metaflow.decorators import flow_decorators
from metaflow.exception import MetaflowException
from metaflow.includefile import FilePathClass
from metaflow.metaflow_config import (
AIRFLOW_KUBERNETES_CONN_ID,
AIRFLOW_KUBERNETES_KUBECONFIG_CONTEXT,
AIRFLOW_KUBERNETES_KUBECONFIG_FILE,
AIRFLOW_KUBERNETES_STARTUP_TIMEOUT_SECONDS,
AWS_SECRETS_MANAGER_DEFAULT_REGION,
GCP_SECRET_MANAGER_PREFIX,
AZURE_STORAGE_BLOB_SERVICE_ENDPOINT,
CARD_AZUREROOT,
CARD_GSROOT,
CARD_S3ROOT,
DATASTORE_SYSROOT_AZURE,
DATASTORE_SYSROOT_GS,
DATASTORE_SYSROOT_S3,
DATATOOLS_S3ROOT,
DEFAULT_SECRETS_BACKEND_TYPE,
KUBERNETES_SECRETS,
KUBERNETES_SERVICE_ACCOUNT,
S3_ENDPOINT_URL,
SERVICE_HEADERS,
SERVICE_INTERNAL_URL,
AZURE_KEY_VAULT_PREFIX,
)
from metaflow.metaflow_config_funcs import config_values
from metaflow.parameters import (
DelayedEvaluationParameter,
JSONTypeClass,
deploy_time_eval,
)
# TODO: Move chevron to _vendor
from metaflow.plugins.cards.card_modules import chevron
from metaflow.plugins.kubernetes.kubernetes import Kubernetes
from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
from metaflow.util import compress_list, dict_to_cli_options, get_username
from . import airflow_utils
from .airflow_utils import AIRFLOW_MACROS, TASK_ID_XCOM_KEY, AirflowTask, Workflow
from .exception import AirflowException
from .sensors import SUPPORTED_SENSORS
AIRFLOW_DEPLOY_TEMPLATE_FILE = os.path.join(os.path.dirname(__file__), "dag.py")
class Airflow(object):
TOKEN_STORAGE_ROOT = "mf.airflow"
def __init__(
self,
name,
graph,
flow,
code_package_metadata,
code_package_sha,
code_package_url,
metadata,
flow_datastore,
environment,
event_logger,
monitor,
production_token,
tags=None,
namespace=None,
username=None,
max_workers=None,
worker_pool=None,
description=None,
file_path=None,
workflow_timeout=None,
is_paused_upon_creation=True,
):
self.name = name
self.graph = graph
self.flow = flow
self.code_package_metadata = code_package_metadata
self.code_package_sha = code_package_sha
self.code_package_url = code_package_url
self.metadata = metadata
self.flow_datastore = flow_datastore
self.environment = environment
self.event_logger = event_logger
self.monitor = monitor
self.tags = tags
self.namespace = namespace # this is the username space
self.username = username
self.max_workers = max_workers
self.description = description
self._depends_on_upstream_sensors = False
self._file_path = file_path
_, self.graph_structure = self.graph.output_steps()
self.worker_pool = worker_pool
self.is_paused_upon_creation = is_paused_upon_creation
self.workflow_timeout = workflow_timeout
self.schedule = self._get_schedule()
self.parameters = self._process_parameters()
self.production_token = production_token
self.contains_foreach = self._contains_foreach()
@classmethod
def get_existing_deployment(cls, name, flow_datastore):
_backend = flow_datastore._storage_impl
token_exists = _backend.is_file([cls.get_token_path(name)])
if not token_exists[0]:
return None
with _backend.load_bytes([cls.get_token_path(name)]) as get_results:
for _, path, _ in get_results:
if path is not None:
with open(path, "r") as f:
data = json.loads(f.read())
return (data["owner"], data["production_token"])
@classmethod
def get_token_path(cls, name):
return os.path.join(cls.TOKEN_STORAGE_ROOT, name)
@classmethod
def save_deployment_token(cls, owner, name, token, flow_datastore):
_backend = flow_datastore._storage_impl
_backend.save_bytes(
[
(
cls.get_token_path(name),
BytesIO(
bytes(
json.dumps({"production_token": token, "owner": owner}),
"utf-8",
)
),
)
],
overwrite=False,
)
def _get_schedule(self):
# Using the cron presets provided here :
# https://airflow.apache.org/docs/apache-airflow/stable/dag-run.html?highlight=schedule%20interval#cron-presets
schedule = self.flow._flow_decorators.get("schedule")
if not schedule:
return None
schedule = schedule[0]
if schedule.attributes["cron"]:
return schedule.attributes["cron"]
elif schedule.attributes["weekly"]:
return "@weekly"
elif schedule.attributes["hourly"]:
return "@hourly"
elif schedule.attributes["daily"]:
return "@daily"
return None
def _get_retries(self, node):
max_user_code_retries = 0
max_error_retries = 0
foreach_default_retry = 1
# Different decorators may have different retrying strategies, so take
# the max of them.
for deco in node.decorators:
user_code_retries, error_retries = deco.step_task_retry_count()
max_user_code_retries = max(max_user_code_retries, user_code_retries)
max_error_retries = max(max_error_retries, error_retries)
parent_is_foreach = any( # The immediate parent is a foreach node.
self.graph[n].type == "foreach" for n in node.in_funcs
)
if parent_is_foreach:
max_user_code_retries + foreach_default_retry
return max_user_code_retries, max_user_code_retries + max_error_retries
def _get_retry_delay(self, node):
retry_decos = [deco for deco in node.decorators if deco.name == "retry"]
if len(retry_decos) > 0:
retry_mins = retry_decos[0].attributes["minutes_between_retries"]
return timedelta(minutes=int(retry_mins))
return None
def _process_parameters(self):
airflow_params = []
type_transform_dict = {
int.__name__: "integer",
str.__name__: "string",
bool.__name__: "string",
float.__name__: "number",
}
for var, param in self.flow._get_parameters():
# Airflow requires defaults set for parameters.
value = deploy_time_eval(param.kwargs.get("default"))
# Setting airflow related param args.
airflow_param = dict(
name=param.name,
)
if value is not None:
airflow_param["default"] = value
if param.kwargs.get("help"):
airflow_param["description"] = param.kwargs.get("help")
# Since we will always have a default value and `deploy_time_eval` resolved that to an actual value
# we can just use the `default` to infer the object's type.
# This avoids parsing/identifying types like `JSONType` or `FilePathClass`
# which are returned by calling `param.kwargs.get("type")`
param_type = type(airflow_param["default"])
# extract the name of the type and resolve the type-name
# compatible with Airflow.
param_type_name = getattr(param_type, "__name__", None)
if param_type_name in type_transform_dict:
airflow_param["type"] = type_transform_dict[param_type_name]
if param_type_name == bool.__name__:
airflow_param["default"] = str(airflow_param["default"])
airflow_params.append(airflow_param)
return airflow_params
def _compress_input_path(
self,
steps,
):
"""
This function is meant to compress the input paths, and it specifically doesn't use
`metaflow.util.compress_list` under the hood. The reason is that the `AIRFLOW_MACROS.RUN_ID` is a complicated
macro string that doesn't behave nicely with `metaflow.util.decompress_list`, since the `decompress_util`
function expects a string which doesn't contain any delimiter characters and the run-id string does. Hence, we
have a custom compression string created via `_compress_input_path` function instead of `compress_list`.
"""
return "%s:" % (AIRFLOW_MACROS.RUN_ID) + ",".join(
self._make_input_path(step, only_task_id=True) for step in steps
)
def _make_foreach_input_path(self, step_name):
return (
"%s/%s/:{{ task_instance.xcom_pull(task_ids='%s',key='%s') | join_list }}"
% (
AIRFLOW_MACROS.RUN_ID,
step_name,
step_name,
TASK_ID_XCOM_KEY,
)
)
def _make_input_path(self, step_name, only_task_id=False):
"""
This is set using the `airflow_internal` decorator to help pass state.
This will pull the `TASK_ID_XCOM_KEY` xcom which holds task-ids.
The key is set via the `MetaflowKubernetesOperator`.
"""
task_id_string = "/%s/{{ task_instance.xcom_pull(task_ids='%s',key='%s') }}" % (
step_name,
step_name,
TASK_ID_XCOM_KEY,
)
if only_task_id:
return task_id_string
return "%s%s" % (AIRFLOW_MACROS.RUN_ID, task_id_string)
def _to_job(self, node):
"""
This function will transform the node's specification into Airflow compatible operator arguments.
Since this function is long, below is the summary of the two major duties it performs:
1. Based on the type of the graph node (start/linear/foreach/join etc.)
it will decide how to set the input paths
2. Based on node's decorator specification convert the information into
a job spec for the KubernetesPodOperator.
"""
# Add env vars from the optional @environment decorator.
env_deco = [deco for deco in node.decorators if deco.name == "environment"]
env = {}
if env_deco:
env = env_deco[0].attributes["vars"].copy()
# The below if/else block handles "input paths".
# Input Paths help manage dataflow across the graph.
if node.name == "start":
# POSSIBLE_FUTURE_IMPROVEMENT:
# We can extract metadata about the possible upstream sensor triggers.
# There is a previous commit (7bdf6) in the `airflow` branch that has `SensorMetaExtractor` class and
# associated MACRO we have built to handle this case if a metadata regarding the sensor is needed.
# Initialize parameters for the flow in the `start` step.
# `start` step has no upstream input dependencies aside from
# parameters.
if len(self.parameters):
env["METAFLOW_PARAMETERS"] = AIRFLOW_MACROS.PARAMETERS
input_paths = None
else:
# If it is not the start node then we check if there are many paths
# converging into it or a single path. Based on that we set the INPUT_PATHS
if node.parallel_foreach:
raise AirflowException(
"Parallel steps are not supported yet with Airflow."
)
is_foreach_join = (
node.type == "join"
and self.graph[node.split_parents[-1]].type == "foreach"
)
if is_foreach_join:
input_paths = self._make_foreach_input_path(node.in_funcs[0])
elif len(node.in_funcs) == 1:
# set input paths where this is only one parent node
# The parent-task-id is passed via the xcom; There is no other way to get that.
# One key thing about xcoms is that they are immutable and only accepted if the task
# doesn't fail.
# From airflow docs :
# "Note: If the first task run is not succeeded then on every retry task
# XComs will be cleared to make the task run idempotent."
input_paths = self._make_input_path(node.in_funcs[0])
else:
# this is a split scenario where there can be more than one input paths.
input_paths = self._compress_input_path(node.in_funcs)
# env["METAFLOW_INPUT_PATHS"] = input_paths
env["METAFLOW_CODE_URL"] = self.code_package_url
env["METAFLOW_FLOW_NAME"] = self.flow.name
env["METAFLOW_STEP_NAME"] = node.name
env["METAFLOW_OWNER"] = self.username
metadata_env = self.metadata.get_runtime_environment("airflow")
env.update(metadata_env)
metaflow_version = self.environment.get_environment_info()
metaflow_version["flow_name"] = self.graph.name
metaflow_version["production_token"] = self.production_token
env["METAFLOW_VERSION"] = json.dumps(metaflow_version)
# Temporary passing of *some* environment variables. Do not rely on this
# mechanism as it will be removed in the near future
env.update(
{
k: v
for k, v in config_values()
if k.startswith("METAFLOW_CONDA_") or k.startswith("METAFLOW_DEBUG_")
}
)
# Extract the k8s decorators for constructing the arguments of the K8s Pod Operator on Airflow.
k8s_deco = [deco for deco in node.decorators if deco.name == "kubernetes"][0]
user_code_retries, _ = self._get_retries(node)
retry_delay = self._get_retry_delay(node)
# This sets timeouts for @timeout decorators.
# The timeout is set as "execution_timeout" for an airflow task.
runtime_limit = get_run_time_limit_for_task(node.decorators)
k8s = Kubernetes(self.flow_datastore, self.metadata, self.environment)
user = util.get_username()
labels = {
"app": "metaflow",
"app.kubernetes.io/name": "metaflow-task",
"app.kubernetes.io/part-of": "metaflow",
"app.kubernetes.io/created-by": user,
# Question to (savin) : Should we have username set over here for created by since it is the
# airflow installation that is creating the jobs.
# Technically the "user" is the stakeholder but should these labels be present.
}
additional_mf_variables = {
"METAFLOW_CODE_METADATA": self.code_package_metadata,
"METAFLOW_CODE_SHA": self.code_package_sha,
"METAFLOW_CODE_URL": self.code_package_url,
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
"METAFLOW_USER": user,
"METAFLOW_SERVICE_URL": SERVICE_INTERNAL_URL,
"METAFLOW_SERVICE_HEADERS": json.dumps(SERVICE_HEADERS),
"METAFLOW_DATASTORE_SYSROOT_S3": DATASTORE_SYSROOT_S3,
"METAFLOW_DATATOOLS_S3ROOT": DATATOOLS_S3ROOT,
"METAFLOW_DEFAULT_DATASTORE": self.flow_datastore.TYPE,
"METAFLOW_DEFAULT_METADATA": "service",
"METAFLOW_KUBERNETES_WORKLOAD": str(
1
), # This is used by kubernetes decorator.
"METAFLOW_RUNTIME_ENVIRONMENT": "kubernetes",
"METAFLOW_CARD_S3ROOT": CARD_S3ROOT,
"METAFLOW_RUN_ID": AIRFLOW_MACROS.RUN_ID,
"METAFLOW_AIRFLOW_TASK_ID": AIRFLOW_MACROS.create_task_id(
self.contains_foreach
),
"METAFLOW_AIRFLOW_DAG_RUN_ID": AIRFLOW_MACROS.AIRFLOW_RUN_ID,
"METAFLOW_AIRFLOW_JOB_ID": AIRFLOW_MACROS.AIRFLOW_JOB_ID,
"METAFLOW_PRODUCTION_TOKEN": self.production_token,
"METAFLOW_ATTEMPT_NUMBER": AIRFLOW_MACROS.ATTEMPT,
# GCP stuff
"METAFLOW_DATASTORE_SYSROOT_GS": DATASTORE_SYSROOT_GS,
"METAFLOW_CARD_GSROOT": CARD_GSROOT,
"METAFLOW_S3_ENDPOINT_URL": S3_ENDPOINT_URL,
}
env["METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT"] = (
AZURE_STORAGE_BLOB_SERVICE_ENDPOINT
)
env["METAFLOW_DATASTORE_SYSROOT_AZURE"] = DATASTORE_SYSROOT_AZURE
env["METAFLOW_CARD_AZUREROOT"] = CARD_AZUREROOT
if DEFAULT_SECRETS_BACKEND_TYPE:
env["METAFLOW_DEFAULT_SECRETS_BACKEND_TYPE"] = DEFAULT_SECRETS_BACKEND_TYPE
if AWS_SECRETS_MANAGER_DEFAULT_REGION:
env["METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION"] = (
AWS_SECRETS_MANAGER_DEFAULT_REGION
)
if GCP_SECRET_MANAGER_PREFIX:
env["METAFLOW_GCP_SECRET_MANAGER_PREFIX"] = GCP_SECRET_MANAGER_PREFIX
if AZURE_KEY_VAULT_PREFIX:
env["METAFLOW_AZURE_KEY_VAULT_PREFIX"] = AZURE_KEY_VAULT_PREFIX
env.update(additional_mf_variables)
service_account = (
KUBERNETES_SERVICE_ACCOUNT
if k8s_deco.attributes["service_account"] is None
else k8s_deco.attributes["service_account"]
)
k8s_namespace = (
k8s_deco.attributes["namespace"]
if k8s_deco.attributes["namespace"] is not None
else "default"
)
qos_requests, qos_limits = qos_requests_and_limits(
k8s_deco.attributes["qos"],
k8s_deco.attributes["cpu"],
k8s_deco.attributes["memory"],
k8s_deco.attributes["disk"],
)
resources = dict(
requests=qos_requests,
limits={
**qos_limits,
**{
"%s.com/gpu".lower()
% k8s_deco.attributes["gpu_vendor"]: str(k8s_deco.attributes["gpu"])
for k in [0]
# Don't set GPU limits if gpu isn't specified.
if k8s_deco.attributes["gpu"] is not None
},
},
)
annotations = {
"metaflow/production_token": self.production_token,
"metaflow/owner": self.username,
"metaflow/user": self.username,
"metaflow/flow_name": self.flow.name,
}
if current.get("project_name"):
annotations.update(
{
"metaflow/project_name": current.project_name,
"metaflow/branch_name": current.branch_name,
"metaflow/project_flow_name": current.project_flow_name,
}
)
k8s_operator_args = dict(
# like argo workflows we use step_name as name of container
name=node.name,
namespace=k8s_namespace,
service_account_name=service_account,
node_selector=k8s_deco.attributes["node_selector"],
cmds=k8s._command(
self.flow.name,
AIRFLOW_MACROS.RUN_ID,
node.name,
AIRFLOW_MACROS.create_task_id(self.contains_foreach),
AIRFLOW_MACROS.ATTEMPT,
code_package_metadata=self.code_package_metadata,
code_package_url=self.code_package_url,
step_cmds=self._step_cli(
node, input_paths, self.code_package_url, user_code_retries
),
),
annotations=annotations,
image=k8s_deco.attributes["image"],
resources=resources,
execution_timeout=dict(seconds=runtime_limit),
retries=user_code_retries,
env_vars=[dict(name=k, value=v) for k, v in env.items() if v is not None],
labels=labels,
task_id=node.name,
startup_timeout_seconds=AIRFLOW_KUBERNETES_STARTUP_TIMEOUT_SECONDS,
get_logs=True,
do_xcom_push=True,
log_events_on_failure=True,
is_delete_operator_pod=True,
retry_exponential_backoff=False, # todo : should this be a arg we allow on CLI. not right now - there is an open ticket for this - maybe at some point we will.
reattach_on_restart=False,
secrets=[],
)
k8s_operator_args["in_cluster"] = True
if AIRFLOW_KUBERNETES_CONN_ID is not None:
k8s_operator_args["kubernetes_conn_id"] = AIRFLOW_KUBERNETES_CONN_ID
k8s_operator_args["in_cluster"] = False
if AIRFLOW_KUBERNETES_KUBECONFIG_CONTEXT is not None:
k8s_operator_args["cluster_context"] = AIRFLOW_KUBERNETES_KUBECONFIG_CONTEXT
k8s_operator_args["in_cluster"] = False
if AIRFLOW_KUBERNETES_KUBECONFIG_FILE is not None:
k8s_operator_args["config_file"] = AIRFLOW_KUBERNETES_KUBECONFIG_FILE
k8s_operator_args["in_cluster"] = False
if k8s_deco.attributes["secrets"]:
if isinstance(k8s_deco.attributes["secrets"], str):
k8s_operator_args["secrets"] = k8s_deco.attributes["secrets"].split(",")
elif isinstance(k8s_deco.attributes["secrets"], list):
k8s_operator_args["secrets"] = k8s_deco.attributes["secrets"]
if len(KUBERNETES_SECRETS) > 0:
k8s_operator_args["secrets"] += KUBERNETES_SECRETS.split(",")
if retry_delay:
k8s_operator_args["retry_delay"] = dict(seconds=retry_delay.total_seconds())
return k8s_operator_args
def _step_cli(self, node, paths, code_package_url, user_code_retries):
cmds = []
script_name = os.path.basename(sys.argv[0])
executable = self.environment.executable(node.name)
entrypoint = [executable, script_name]
top_opts_dict = {
"with": [
decorator.make_decorator_spec()
for decorator in node.decorators
if not decorator.statically_defined and decorator.inserted_by is None
]
}
# FlowDecorators can define their own top-level options. They are
# responsible for adding their own top-level options and values through
# the get_top_level_options() hook. See similar logic in runtime.py.
for deco in flow_decorators(self.flow):
top_opts_dict.update(deco.get_top_level_options())
top_opts = list(dict_to_cli_options(top_opts_dict))
top_level = top_opts + [
"--quiet",
"--metadata=%s" % self.metadata.TYPE,
"--environment=%s" % self.environment.TYPE,
"--datastore=%s" % self.flow_datastore.TYPE,
"--datastore-root=%s" % self.flow_datastore.datastore_root,
"--event-logger=%s" % self.event_logger.TYPE,
"--monitor=%s" % self.monitor.TYPE,
"--no-pylint",
"--with=airflow_internal",
]
if node.name == "start":
# We need a separate unique ID for the special _parameters task
task_id_params = "%s-params" % AIRFLOW_MACROS.create_task_id(
self.contains_foreach
)
# Export user-defined parameters into runtime environment
param_file = "".join(
random.choice(string.ascii_lowercase) for _ in range(10)
)
# Setup Parameters as environment variables which are stored in a dictionary.
export_params = (
"python -m "
"metaflow.plugins.airflow.plumbing.set_parameters %s "
"&& . `pwd`/%s" % (param_file, param_file)
)
# Setting parameters over here.
params = (
entrypoint
+ top_level
+ [
"init",
"--run-id %s" % AIRFLOW_MACROS.RUN_ID,
"--task-id %s" % task_id_params,
]
)
# Assign tags to run objects.
if self.tags:
params.extend("--tag %s" % tag for tag in self.tags)
# If the start step gets retried, we must be careful not to
# regenerate multiple parameters tasks. Hence, we check first if
# _parameters exists already.
exists = entrypoint + [
# Dump the parameters task
"dump",
"--max-value-size=0",
"%s/_parameters/%s" % (AIRFLOW_MACROS.RUN_ID, task_id_params),
]
cmd = "if ! %s >/dev/null 2>/dev/null; then %s && %s; fi" % (
" ".join(exists),
export_params,
" ".join(params),
)
cmds.append(cmd)
# set input paths for parameters
paths = "%s/_parameters/%s" % (AIRFLOW_MACROS.RUN_ID, task_id_params)
step = [
"step",
node.name,
"--run-id %s" % AIRFLOW_MACROS.RUN_ID,
"--task-id %s" % AIRFLOW_MACROS.create_task_id(self.contains_foreach),
"--retry-count %s" % AIRFLOW_MACROS.ATTEMPT,
"--max-user-code-retries %d" % user_code_retries,
"--input-paths %s" % paths,
]
if self.tags:
step.extend("--tag %s" % tag for tag in self.tags)
if self.namespace is not None:
step.append("--namespace=%s" % self.namespace)
parent_is_foreach = any( # The immediate parent is a foreach node.
self.graph[n].type == "foreach" for n in node.in_funcs
)
if parent_is_foreach:
step.append("--split-index %s" % AIRFLOW_MACROS.FOREACH_SPLIT_INDEX)
cmds.append(" ".join(entrypoint + top_level + step))
return cmds
def _collect_flow_sensors(self):
decos_lists = [
self.flow._flow_decorators.get(s.name)
for s in SUPPORTED_SENSORS
if self.flow._flow_decorators.get(s.name) is not None
]
af_tasks = [deco.create_task() for decos in decos_lists for deco in decos]
if len(af_tasks) > 0:
self._depends_on_upstream_sensors = True
return af_tasks
def _contains_foreach(self):
for node in self.graph:
if node.type == "foreach":
return True
return False
def compile(self):
if self.flow._flow_decorators.get("trigger") or self.flow._flow_decorators.get(
"trigger_on_finish"
):
raise AirflowException(
"Deploying flows with @trigger or @trigger_on_finish decorator(s) "
"to Airflow is not supported currently."
)
if self.flow._flow_decorators.get("exit_hook"):
raise AirflowException(
"Deploying flows with the @exit_hook decorator "
"to Airflow is not currently supported."
)
# Visit every node of the flow and recursively build the state machine.
def _visit(node, workflow, exit_node=None):
kube_deco = dict(
[deco for deco in node.decorators if deco.name == "kubernetes"][
0
].attributes
)
if kube_deco:
# Only guard against use_tmpfs and tmpfs_size as these determine if tmpfs is enabled.
for attr in [
"use_tmpfs",
"tmpfs_size",
"persistent_volume_claims",
"image_pull_policy",
]:
if kube_deco[attr]:
raise AirflowException(
"The decorator attribute *%s* is currently not supported on Airflow "
"for the @kubernetes decorator on step *%s*"
% (attr, node.name)
)
parent_is_foreach = any( # Any immediate parent is a foreach node.
self.graph[n].type == "foreach" for n in node.in_funcs
)
state = AirflowTask(
node.name, is_mapper_node=parent_is_foreach
).set_operator_args(**self._to_job(node))
if node.type == "end":
workflow.add_state(state)
# Continue linear assignment within the (sub)workflow if the node
# doesn't branch or fork.
elif node.type in ("start", "linear", "join", "foreach"):
workflow.add_state(state)
_visit(
self.graph[node.out_funcs[0]],
workflow,
)
elif node.type == "split":
workflow.add_state(state)
for func in node.out_funcs:
_visit(
self.graph[func],
workflow,
)
else:
raise AirflowException(
"Node type *%s* for step *%s* "
"is not currently supported by "
"Airflow." % (node.type, node.name)
)
return workflow
# set max active tasks here , For more info check here :
# https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/models/dag/index.html#airflow.models.dag.DAG
airflow_dag_args = (
{} if self.max_workers is None else dict(max_active_tasks=self.max_workers)
)
airflow_dag_args["is_paused_upon_creation"] = self.is_paused_upon_creation
# workflow timeout should only be enforced if a dag is scheduled.
if self.workflow_timeout is not None and self.schedule is not None:
airflow_dag_args["dagrun_timeout"] = dict(seconds=self.workflow_timeout)
appending_sensors = self._collect_flow_sensors()
workflow = Workflow(
dag_id=self.name,
default_args=self._create_defaults(),
description=self.description,
schedule_interval=self.schedule,
# `start_date` is a mandatory argument even though the documentation lists it as optional value
# Based on the code, Airflow will throw a `AirflowException` when `start_date` is not provided
# to a DAG : https://github.com/apache/airflow/blob/0527a0b6ce506434a23bc2a6f5ddb11f492fc614/airflow/models/dag.py#L2170
start_date=datetime.now(),
tags=self.tags,
file_path=self._file_path,
graph_structure=self.graph_structure,
metadata=dict(
contains_foreach=self.contains_foreach, flow_name=self.flow.name
),
**airflow_dag_args
)
workflow = _visit(self.graph["start"], workflow)
workflow.set_parameters(self.parameters)
if len(appending_sensors) > 0:
for s in appending_sensors:
workflow.add_state(s)
workflow.graph_structure.insert(0, [[s.name] for s in appending_sensors])
return self._to_airflow_dag_file(workflow.to_dict())
def _to_airflow_dag_file(self, json_dag):
util_file = None
with open(airflow_utils.__file__) as f:
util_file = f.read()
with open(AIRFLOW_DEPLOY_TEMPLATE_FILE) as f:
return chevron.render(
f.read(),
dict(
# Converting the configuration to base64 so that there can be no indentation related issues that can be caused because of
# malformed strings / json.
config=json_dag,
utils=util_file,
deployed_on=str(datetime.now()),
),
)
def _create_defaults(self):
defu_ = {
"owner": get_username(),
# If set on a task and the previous run of the task has failed,
# it will not run the task in the current DAG run.
"depends_on_past": False,
# TODO: Enable emails
"execution_timeout": timedelta(days=5),
"retry_delay": timedelta(seconds=200),
# check https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/models/baseoperator/index.html?highlight=retry_delay#airflow.models.baseoperator.BaseOperatorMeta
}
if self.worker_pool is not None:
defu_["pool"] = self.worker_pool
return defu_
================================================
FILE: metaflow/plugins/airflow/airflow_cli.py
================================================
import base64
import os
import re
import sys
from hashlib import sha1
from metaflow import current, decorators
from metaflow._vendor import click
from metaflow.exception import MetaflowException, MetaflowInternalError
from metaflow.metaflow_config import FEAT_ALWAYS_UPLOAD_CODE_PACKAGE
from metaflow.package import MetaflowPackage
from metaflow.plugins.aws.step_functions.production_token import (
load_token,
new_token,
store_token,
)
from metaflow.plugins.kubernetes.kubernetes_decorator import KubernetesDecorator
from metaflow.util import get_username, to_bytes, to_unicode
from .airflow import Airflow
from .exception import AirflowException, NotSupportedException
class IncorrectProductionToken(MetaflowException):
headline = "Incorrect production token"
VALID_NAME = re.compile(r"[^a-zA-Z0-9_\-\.]")
def resolve_token(
name, token_prefix, obj, authorize, given_token, generate_new_token, is_project
):
# 1) retrieve the previous deployment, if one exists
workflow = Airflow.get_existing_deployment(name, obj.flow_datastore)
if workflow is None:
obj.echo(
"It seems this is the first time you are deploying *%s* to "
"Airflow." % name
)
prev_token = None
else:
prev_user, prev_token = workflow
# 2) authorize this deployment
if prev_token is not None:
if authorize is None:
authorize = load_token(token_prefix)
elif authorize.startswith("production:"):
authorize = authorize[11:]
# we allow the user who deployed the previous version to re-deploy,
# even if they don't have the token
if prev_user != get_username() and authorize != prev_token:
obj.echo(
"There is an existing version of *%s* on Airflow which was "
"deployed by the user *%s*." % (name, prev_user)
)
obj.echo(
"To deploy a new version of this flow, you need to use the same "
"production token that they used. "
)
obj.echo(
"Please reach out to them to get the token. Once you have it, call "
"this command:"
)
obj.echo(" airflow create --authorize MY_TOKEN", fg="green")
obj.echo(
'See "Organizing Results" at docs.metaflow.org for more information '
"about production tokens."
)
raise IncorrectProductionToken(
"Try again with the correct production token."
)
# 3) do we need a new token or should we use the existing token?
if given_token:
if is_project:
# we rely on a known prefix for @project tokens, so we can't
# allow the user to specify a custom token with an arbitrary prefix
raise MetaflowException(
"--new-token is not supported for @projects. Use --generate-new-token "
"to create a new token."
)
if given_token.startswith("production:"):
given_token = given_token[11:]
token = given_token
obj.echo("")
obj.echo("Using the given token, *%s*." % token)
elif prev_token is None or generate_new_token:
token = new_token(token_prefix, prev_token)
if token is None:
if prev_token is None:
raise MetaflowInternalError(
"We could not generate a new token. This is unexpected. "
)
else:
raise MetaflowException(
"--generate-new-token option is not supported after using "
"--new-token. Use --new-token to make a new namespace."
)
obj.echo("")
obj.echo("A new production token generated.")
Airflow.save_deployment_token(get_username(), name, token, obj.flow_datastore)
else:
token = prev_token
obj.echo("")
obj.echo("The namespace of this production flow is")
obj.echo(" production:%s" % token, fg="green")
obj.echo(
"To analyze results of this production flow add this line in your notebooks:"
)
obj.echo(' namespace("production:%s")' % token, fg="green")
obj.echo(
"If you want to authorize other people to deploy new versions of this flow to "
"Airflow, they need to call"
)
obj.echo(" airflow create --authorize %s" % token, fg="green")
obj.echo("when deploying this flow to Airflow for the first time.")
obj.echo(
'See "Organizing Results" at https://docs.metaflow.org/ for more '
"information about production tokens."
)
obj.echo("")
store_token(token_prefix, token)
return token
@click.group()
def cli():
pass
@cli.group(help="Commands related to Airflow.")
@click.option(
"--name",
default=None,
type=str,
help="Airflow DAG name. The flow name is used instead if this option is not "
"specified",
)
@click.pass_obj
def airflow(obj, name=None):
obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
obj.dag_name, obj.token_prefix, obj.is_project = resolve_dag_name(name)
@airflow.command(help="Compile a new version of this flow to Airflow DAG.")
@click.argument("file", required=True)
@click.option(
"--authorize",
default=None,
help="Authorize using this production token. You need this "
"when you are re-deploying an existing flow for the first "
"time. The token is cached in METAFLOW_HOME, so you only "
"need to specify this once.",
)
@click.option(
"--generate-new-token",
is_flag=True,
help="Generate a new production token for this flow. "
"This will move the production flow to a new namespace.",
)
@click.option(
"--new-token",
"given_token",
default=None,
help="Use the given production token for this flow. "
"This will move the production flow to the given namespace.",
)
@click.option(
"--tag",
"tags",
multiple=True,
default=None,
help="Annotate all objects produced by Airflow DAG executions "
"with the given tag. You can specify this option multiple "
"times to attach multiple tags.",
)
@click.option(
"--is-paused-upon-creation",
default=False,
is_flag=True,
help="Generated Airflow DAG is paused/unpaused upon creation.",
)
@click.option(
"--namespace",
"user_namespace",
default=None,
# TODO (savin): Identify the default namespace?
help="Change the namespace from the default to the given tag. "
"See run --help for more information.",
)
@click.option(
"--max-workers",
default=100,
show_default=True,
help="Maximum number of parallel processes.",
)
@click.option(
"--workflow-timeout",
default=None,
type=int,
help="Workflow timeout in seconds. Enforced only for scheduled DAGs.",
)
@click.option(
"--worker-pool",
default=None,
show_default=True,
help="Worker pool for Airflow DAG execution.",
)
@click.pass_obj
def create(
obj,
file,
authorize=None,
generate_new_token=False,
given_token=None,
tags=None,
is_paused_upon_creation=False,
user_namespace=None,
max_workers=None,
workflow_timeout=None,
worker_pool=None,
):
if os.path.abspath(sys.argv[0]) == os.path.abspath(file):
raise MetaflowException(
"Airflow DAG file name cannot be the same as flow file name"
)
# Validate if the workflow is correctly parsed.
_validate_workflow(
obj.flow, obj.graph, obj.flow_datastore, obj.metadata, workflow_timeout
)
obj.echo("Compiling *%s* to Airflow DAG..." % obj.dag_name, bold=True)
token = resolve_token(
obj.dag_name,
obj.token_prefix,
obj,
authorize,
given_token,
generate_new_token,
obj.is_project,
)
flow = make_flow(
obj,
obj.dag_name,
token,
tags,
is_paused_upon_creation,
user_namespace,
max_workers,
workflow_timeout,
worker_pool,
file,
)
with open(file, "w") as f:
f.write(flow.compile())
obj.echo(
"DAG *{dag_name}* "
"for flow *{name}* compiled to "
"Airflow successfully.\n".format(dag_name=obj.dag_name, name=current.flow_name),
bold=True,
)
def make_flow(
obj,
dag_name,
production_token,
tags,
is_paused_upon_creation,
namespace,
max_workers,
workflow_timeout,
worker_pool,
file,
):
# Attach @kubernetes.
decorators._attach_decorators(obj.flow, [KubernetesDecorator.name])
decorators._process_late_attached_decorator(
[KubernetesDecorator.name],
obj.flow,
obj.graph,
obj.environment,
obj.flow_datastore,
obj.logger,
)
obj.graph = obj.flow._graph
# Save the code package in the flow datastore so that both user code and
# metaflow package can be retrieved during workflow execution.
obj.package = MetaflowPackage(
obj.flow,
obj.environment,
obj.echo,
suffixes=obj.package_suffixes,
flow_datastore=obj.flow_datastore if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE else None,
)
# This blocks until the package is created
if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE:
package_url = obj.package.package_url()
package_sha = obj.package.package_sha()
else:
package_url, package_sha = obj.flow_datastore.save_data(
[obj.package.blob], len_hint=1
)[0]
return Airflow(
dag_name,
obj.graph,
obj.flow,
obj.package.package_metadata,
package_sha,
package_url,
obj.metadata,
obj.flow_datastore,
obj.environment,
obj.event_logger,
obj.monitor,
production_token,
tags=tags,
namespace=namespace,
username=get_username(),
max_workers=max_workers,
worker_pool=worker_pool,
workflow_timeout=workflow_timeout,
description=obj.flow.__doc__,
file_path=file,
is_paused_upon_creation=is_paused_upon_creation,
)
def _validate_foreach_constraints(graph):
def traverse_graph(node, state):
if node.type == "foreach" and node.is_inside_foreach:
raise NotSupportedException(
"Step *%s* is a foreach step called within a foreach step. "
"This type of graph is currently not supported with Airflow."
% node.name
)
if node.type == "foreach":
state["foreach_stack"] = [node.name]
if node.type in ("start", "linear", "join", "foreach"):
if node.type == "linear" and node.is_inside_foreach:
state["foreach_stack"].append(node.name)
if "foreach_stack" in state and len(state["foreach_stack"]) > 2:
raise NotSupportedException(
"The foreach step *%s* created by step *%s* needs to have an immediate join step. "
"Step *%s* is invalid since it is a linear step with a foreach. "
"This type of graph is currently not supported with Airflow."
% (
state["foreach_stack"][1],
state["foreach_stack"][0],
state["foreach_stack"][-1],
)
)
traverse_graph(graph[node.out_funcs[0]], state)
elif node.type == "split":
for func in node.out_funcs:
traverse_graph(graph[func], state)
traverse_graph(graph["start"], {})
def _validate_workflow(flow, graph, flow_datastore, metadata, workflow_timeout):
seen = set()
for var, param in flow._get_parameters():
# Throw an exception if the parameter is specified twice.
norm = param.name.lower()
if norm in seen:
raise MetaflowException(
"Parameter *%s* is specified twice. "
"Note that parameter names are "
"case-insensitive." % param.name
)
seen.add(norm)
if "default" not in param.kwargs:
raise MetaflowException(
"Parameter *%s* does not have a default value. "
"A default value is required for parameters when deploying flows on Airflow."
% param.name
)
# check for other compute related decorators.
_validate_foreach_constraints(graph)
for node in graph:
if node.parallel_foreach:
raise AirflowException(
"Deploying flows with @parallel decorator(s) "
"to Airflow is not supported currently."
)
if any([d.name == "batch" for d in node.decorators]):
raise NotSupportedException(
"Step *%s* is marked for execution on AWS Batch with Airflow which isn't currently supported."
% node.name
)
if any([d.name == "slurm" for d in node.decorators]):
raise NotSupportedException(
"Step *%s* is marked for execution on Slurm with Airflow which isn't currently supported."
% node.name
)
SUPPORTED_DATASTORES = ("azure", "s3", "gs")
if flow_datastore.TYPE not in SUPPORTED_DATASTORES:
raise AirflowException(
"Datastore type `%s` is not supported with `airflow create`. "
"Please choose from datastore of type %s when calling `airflow create`"
% (
str(flow_datastore.TYPE),
"or ".join(["`%s`" % x for x in SUPPORTED_DATASTORES]),
)
)
schedule = flow._flow_decorators.get("schedule")
if not schedule:
return
schedule = schedule[0]
if schedule.timezone is not None:
raise AirflowException(
"`airflow create` does not support scheduling with `timezone`."
)
def resolve_dag_name(name):
project = current.get("project_name")
is_project = False
if project:
is_project = True
if name:
raise MetaflowException(
"--name is not supported for @projects. " "Use --branch instead."
)
dag_name = current.project_flow_name
if dag_name and VALID_NAME.search(dag_name):
raise MetaflowException(
"Name '%s' contains invalid characters. Please construct a name using regex %s"
% (dag_name, VALID_NAME.pattern)
)
project_branch = to_bytes(".".join((project, current.branch_name)))
token_prefix = (
"mfprj-%s"
% to_unicode(base64.b32encode(sha1(project_branch).digest()))[:16]
)
else:
if name and VALID_NAME.search(name):
raise MetaflowException(
"Name '%s' contains invalid characters. Please construct a name using regex %s"
% (name, VALID_NAME.pattern)
)
dag_name = name if name else current.flow_name
token_prefix = dag_name
return dag_name, token_prefix.lower(), is_project
================================================
FILE: metaflow/plugins/airflow/airflow_decorator.py
================================================
import json
import os
from metaflow.decorators import StepDecorator
from metaflow.metadata_provider import MetaDatum
from .airflow_utils import (
TASK_ID_XCOM_KEY,
FOREACH_CARDINALITY_XCOM_KEY,
)
K8S_XCOM_DIR_PATH = "/airflow/xcom"
def safe_mkdir(dir):
try:
os.makedirs(dir)
except FileExistsError:
pass
def push_xcom_values(xcom_dict):
safe_mkdir(K8S_XCOM_DIR_PATH)
with open(os.path.join(K8S_XCOM_DIR_PATH, "return.json"), "w") as f:
json.dump(xcom_dict, f)
class AirflowInternalDecorator(StepDecorator):
name = "airflow_internal"
def task_pre_step(
self,
step_name,
task_datastore,
metadata,
run_id,
task_id,
flow,
graph,
retry_count,
max_user_code_retries,
ubf_context,
inputs,
):
meta = {}
meta["airflow-dag-run-id"] = os.environ["METAFLOW_AIRFLOW_DAG_RUN_ID"]
meta["airflow-job-id"] = os.environ["METAFLOW_AIRFLOW_JOB_ID"]
entries = [
MetaDatum(
field=k, value=v, type=k, tags=["attempt_id:{0}".format(retry_count)]
)
for k, v in meta.items()
]
# Register book-keeping metadata for debugging.
metadata.register_metadata(run_id, step_name, task_id, entries)
def task_finished(
self, step_name, flow, graph, is_task_ok, retry_count, max_user_code_retries
):
# This will pass the xcom when the task finishes.
xcom_values = {
TASK_ID_XCOM_KEY: os.environ["METAFLOW_AIRFLOW_TASK_ID"],
}
if graph[step_name].type == "foreach":
xcom_values[FOREACH_CARDINALITY_XCOM_KEY] = flow._foreach_num_splits
push_xcom_values(xcom_values)
================================================
FILE: metaflow/plugins/airflow/airflow_utils.py
================================================
import hashlib
import json
import sys
import platform
from collections import defaultdict
from datetime import datetime, timedelta
TASK_ID_XCOM_KEY = "metaflow_task_id"
FOREACH_CARDINALITY_XCOM_KEY = "metaflow_foreach_cardinality"
FOREACH_XCOM_KEY = "metaflow_foreach_indexes"
RUN_HASH_ID_LEN = 12
TASK_ID_HASH_LEN = 8
RUN_ID_PREFIX = "airflow"
AIRFLOW_FOREACH_SUPPORT_VERSION = "2.3.0"
AIRFLOW_MIN_SUPPORT_VERSION = "2.2.0"
KUBERNETES_PROVIDER_FOREACH_VERSION = "4.2.0"
class KubernetesProviderNotFound(Exception):
headline = "Kubernetes provider not found"
class ForeachIncompatibleException(Exception):
headline = "Airflow version is incompatible to support Metaflow `foreach`s."
class IncompatibleVersionException(Exception):
headline = "Metaflow is incompatible with current version of Airflow."
def __init__(self, version_number) -> None:
msg = (
"Airflow version %s is incompatible with Metaflow. Metaflow requires Airflow a minimum version %s"
% (version_number, AIRFLOW_MIN_SUPPORT_VERSION)
)
super().__init__(msg)
class IncompatibleKubernetesProviderVersionException(Exception):
headline = (
"Kubernetes Provider version is incompatible with Metaflow `foreach`s. "
"Install the provider via "
"`%s -m pip install apache-airflow-providers-cncf-kubernetes==%s`"
) % (sys.executable, KUBERNETES_PROVIDER_FOREACH_VERSION)
class AirflowSensorNotFound(Exception):
headline = "Sensor package not found"
def create_absolute_version_number(version):
abs_version = None
# For all digits
if all(v.isdigit() for v in version.split(".")):
abs_version = sum(
[
(10 ** (3 - idx)) * i
for idx, i in enumerate([int(v) for v in version.split(".")])
]
)
# For first two digits
elif all(v.isdigit() for v in version.split(".")[:2]):
abs_version = sum(
[
(10 ** (3 - idx)) * i
for idx, i in enumerate([int(v) for v in version.split(".")[:2]])
]
)
return abs_version
def _validate_dynamic_mapping_compatibility():
from airflow.version import version
af_ver = create_absolute_version_number(version)
if af_ver is None or af_ver < create_absolute_version_number(
AIRFLOW_FOREACH_SUPPORT_VERSION
):
ForeachIncompatibleException(
"Please install airflow version %s to use Airflow's Dynamic task mapping functionality."
% AIRFLOW_FOREACH_SUPPORT_VERSION
)
def get_kubernetes_provider_version():
try:
from airflow.providers.cncf.kubernetes.get_provider_info import (
get_provider_info,
)
except ImportError as e:
raise KubernetesProviderNotFound(
"This DAG utilizes `KubernetesPodOperator`. "
"Install the Airflow Kubernetes provider using "
"`%s -m pip install apache-airflow-providers-cncf-kubernetes`"
% sys.executable
)
return get_provider_info()["versions"][0]
def _validate_minimum_airflow_version():
from airflow.version import version
af_ver = create_absolute_version_number(version)
if af_ver is None or af_ver < create_absolute_version_number(
AIRFLOW_MIN_SUPPORT_VERSION
):
raise IncompatibleVersionException(version)
def _check_foreach_compatible_kubernetes_provider():
provider_version = get_kubernetes_provider_version()
ver = create_absolute_version_number(provider_version)
if ver is None or ver < create_absolute_version_number(
KUBERNETES_PROVIDER_FOREACH_VERSION
):
raise IncompatibleKubernetesProviderVersionException()
def datetimeparse(isotimestamp):
ver = int(platform.python_version_tuple()[0]) * 10 + int(
platform.python_version_tuple()[1]
)
if ver >= 37:
return datetime.fromisoformat(isotimestamp)
else:
return datetime.strptime(isotimestamp, "%Y-%m-%dT%H:%M:%S.%f")
def get_xcom_arg_class():
try:
from airflow import XComArg
except ImportError:
return None
return XComArg
class AIRFLOW_MACROS:
# run_id_creator is added via the `user_defined_filters`
RUN_ID = "%s-{{ [run_id, dag_run.dag_id] | run_id_creator }}" % RUN_ID_PREFIX
PARAMETERS = "{{ params | json_dump }}"
STEPNAME = "{{ ti.task_id }}"
# AIRFLOW_MACROS.TASK_ID will work for linear/branched workflows.
# ti.task_id is the stepname in metaflow code.
# AIRFLOW_MACROS.TASK_ID uses a jinja filter called `task_id_creator` which helps
# concatenate the string using a `/`. Since run-id will keep changing and stepname will be
# the same task id will change. Since airflow doesn't encourage dynamic rewriting of dags
# we can rename steps in a foreach with indexes (eg. `stepname-$index`) to create those steps.
# Hence : `foreach`s will require some special form of plumbing.
# https://stackoverflow.com/questions/62962386/can-an-airflow-task-dynamically-generate-a-dag-at-runtime
TASK_ID = (
"%s-{{ [run_id, ti.task_id, dag_run.dag_id] | task_id_creator }}"
% RUN_ID_PREFIX
)
FOREACH_TASK_ID = (
"%s-{{ [run_id, ti.task_id, dag_run.dag_id, ti.map_index] | task_id_creator }}"
% RUN_ID_PREFIX
)
# Airflow run_ids are of the form : "manual__2022-03-15T01:26:41.186781+00:00"
# Such run-ids break the `metaflow.util.decompress_list`; this is why we hash the runid
# We do `echo -n` because it emits line breaks, and we don't want to consider that, since we want same hash value
# when retrieved in python.
RUN_ID_SHELL = (
"%s-$(echo -n {{ run_id }}-{{ dag_run.dag_id }} | md5sum | awk '{print $1}' | awk '{print substr ($0, 0, %s)}')"
% (RUN_ID_PREFIX, str(RUN_HASH_ID_LEN))
)
ATTEMPT = "{{ task_instance.try_number - 1 }}"
AIRFLOW_RUN_ID = "{{ run_id }}"
AIRFLOW_JOB_ID = "{{ ti.job_id }}"
FOREACH_SPLIT_INDEX = "{{ ti.map_index }}"
@classmethod
def create_task_id(cls, is_foreach):
if is_foreach:
return cls.FOREACH_TASK_ID
else:
return cls.TASK_ID
@classmethod
def pathspec(cls, flowname, is_foreach=False):
return "%s/%s/%s/%s" % (
flowname,
cls.RUN_ID,
cls.STEPNAME,
cls.create_task_id(is_foreach),
)
class SensorNames:
EXTERNAL_TASK_SENSOR = "ExternalTaskSensor"
S3_SENSOR = "S3KeySensor"
@classmethod
def get_supported_sensors(cls):
return list(cls.__dict__.values())
def run_id_creator(val):
# join `[dag-id,run-id]` of airflow dag.
return hashlib.md5("-".join([str(x) for x in val]).encode("utf-8")).hexdigest()[
:RUN_HASH_ID_LEN
]
def task_id_creator(val):
# join `[dag-id,run-id]` of airflow dag.
return hashlib.md5("-".join([str(x) for x in val]).encode("utf-8")).hexdigest()[
:TASK_ID_HASH_LEN
]
def id_creator(val, hash_len):
# join `[dag-id,run-id]` of airflow dag.
return hashlib.md5("-".join([str(x) for x in val]).encode("utf-8")).hexdigest()[
:hash_len
]
def json_dump(val):
return json.dumps(val)
class AirflowDAGArgs(object):
# `_arg_types` is a dictionary which represents the types of the arguments of an Airflow `DAG`.
# `_arg_types` is used when parsing types back from the configuration json.
# It doesn't cover all the arguments but covers many of the important one which can come from the cli.
_arg_types = {
"dag_id": str,
"description": str,
"schedule_interval": str,
"start_date": datetime,
"catchup": bool,
"tags": list,
"dagrun_timeout": timedelta,
"default_args": {
"owner": str,
"depends_on_past": bool,
"email": list,
"email_on_failure": bool,
"email_on_retry": bool,
"retries": int,
"retry_delay": timedelta,
"queue": str, # which queue to target when running this job. Not all executors implement queue management, the CeleryExecutor does support targeting specific queues.
"pool": str, # the slot pool this task should run in, slot pools are a way to limit concurrency for certain tasks
"priority_weight": int,
"wait_for_downstream": bool,
"sla": timedelta,
"execution_timeout": timedelta,
"trigger_rule": str,
},
}
# Reference for user_defined_filters : https://stackoverflow.com/a/70175317
filters = dict(
task_id_creator=lambda v: task_id_creator(v),
json_dump=lambda val: json_dump(val),
run_id_creator=lambda val: run_id_creator(val),
join_list=lambda x: ",".join(list(x)),
)
def __init__(self, **kwargs):
self._args = kwargs
@property
def arguments(self):
return dict(**self._args, user_defined_filters=self.filters)
def serialize(self):
def parse_args(dd):
data_dict = {}
for k, v in dd.items():
if isinstance(v, dict):
data_dict[k] = parse_args(v)
elif isinstance(v, datetime):
data_dict[k] = v.isoformat()
elif isinstance(v, timedelta):
data_dict[k] = dict(seconds=v.total_seconds())
else:
data_dict[k] = v
return data_dict
return parse_args(self._args)
@classmethod
def deserialize(cls, data_dict):
def parse_args(dd, type_check_dict):
kwrgs = {}
for k, v in dd.items():
if k not in type_check_dict:
kwrgs[k] = v
elif isinstance(v, dict) and isinstance(type_check_dict[k], dict):
kwrgs[k] = parse_args(v, type_check_dict[k])
elif type_check_dict[k] == datetime:
kwrgs[k] = datetimeparse(v)
elif type_check_dict[k] == timedelta:
kwrgs[k] = timedelta(**v)
else:
kwrgs[k] = v
return kwrgs
return cls(**parse_args(data_dict, cls._arg_types))
def _kubernetes_pod_operator_args(operator_args):
from kubernetes import client
from airflow.kubernetes.secret import Secret
# Set dynamic env variables like run-id, task-id etc from here.
secrets = [
Secret("env", secret, secret) for secret in operator_args.get("secrets", [])
]
args = operator_args
args.update(
{
"secrets": secrets,
# Question for (savin):
# Default timeout in airflow is 120. I can remove `startup_timeout_seconds` for now. how should we expose it to the user?
}
)
# We need to explicitly add the `client.V1EnvVar` over here because
# `pod_runtime_info_envs` doesn't accept arguments in dictionary form and strictly
# Requires objects of type `client.V1EnvVar`
additional_env_vars = [
client.V1EnvVar(
name=k,
value_from=client.V1EnvVarSource(
field_ref=client.V1ObjectFieldSelector(field_path=str(v))
),
)
for k, v in {
"METAFLOW_KUBERNETES_POD_NAMESPACE": "metadata.namespace",
"METAFLOW_KUBERNETES_POD_NAME": "metadata.name",
"METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
"METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
"METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
}.items()
]
args["pod_runtime_info_envs"] = additional_env_vars
resources = args.get("resources")
# KubernetesPodOperator version 4.2.0 renamed `resources` to
# `container_resources` (https://github.com/apache/airflow/pull/24673) / (https://github.com/apache/airflow/commit/45f4290712f5f779e57034f81dbaab5d77d5de85)
# This was done because `KubernetesPodOperator` didn't play nice with dynamic task mapping and they had to
# deprecate the `resources` argument. Hence, the below code path checks for the version of `KubernetesPodOperator`
# and then sets the argument. If the version < 4.2.0 then we set the argument as `resources`.
# If it is > 4.2.0 then we set the argument as `container_resources`
# The `resources` argument of `KubernetesPodOperator` is going to be deprecated soon in the future.
# So we will only use it for `KubernetesPodOperator` version < 4.2.0
# The `resources` argument will also not work for `foreach`s.
provider_version = get_kubernetes_provider_version()
k8s_op_ver = create_absolute_version_number(provider_version)
if k8s_op_ver is None or k8s_op_ver < create_absolute_version_number(
KUBERNETES_PROVIDER_FOREACH_VERSION
):
# Since the provider version is less than `4.2.0` so we need to use the `resources` argument
# We need to explicitly parse `resources`/`container_resources` to `k8s.V1ResourceRequirements`,
# otherwise airflow tries to parse dictionaries to `airflow.providers.cncf.kubernetes.backcompat.pod.Resources`
# object via `airflow.providers.cncf.kubernetes.backcompat.backward_compat_converts.convert_resources` function.
# This fails many times since the dictionary structure it expects is not the same as
# `client.V1ResourceRequirements`.
args["resources"] = client.V1ResourceRequirements(
requests=resources["requests"],
limits=None if "limits" not in resources else resources["limits"],
)
else: # since the provider version is greater than `4.2.0` so should use the `container_resources` argument
args["container_resources"] = client.V1ResourceRequirements(
requests=resources["requests"],
limits=None if "limits" not in resources else resources["limits"],
)
del args["resources"]
if operator_args.get("execution_timeout"):
args["execution_timeout"] = timedelta(
**operator_args.get(
"execution_timeout",
)
)
if operator_args.get("retry_delay"):
args["retry_delay"] = timedelta(**operator_args.get("retry_delay"))
return args
def _parse_sensor_args(name, kwargs):
if name == SensorNames.EXTERNAL_TASK_SENSOR:
if "execution_delta" in kwargs:
if type(kwargs["execution_delta"]) == dict:
kwargs["execution_delta"] = timedelta(**kwargs["execution_delta"])
else:
del kwargs["execution_delta"]
return kwargs
def _get_sensor(name):
# from airflow import XComArg
# XComArg()
if name == SensorNames.EXTERNAL_TASK_SENSOR:
# ExternalTaskSensors uses an execution_date of a dag to
# determine the appropriate DAG.
# This is set to the exact date the current dag gets executed on.
# For example if "DagA" (Upstream DAG) got scheduled at
# 12 Jan 4:00 PM PDT then "DagB"(current DAG)'s task sensor will try to
# look for a "DagA" that got executed at 12 Jan 4:00 PM PDT **exactly**.
# They also support a `execution_timeout` argument to
from airflow.sensors.external_task_sensor import ExternalTaskSensor
return ExternalTaskSensor
elif name == SensorNames.S3_SENSOR:
try:
from airflow.providers.amazon.aws.sensors.s3 import S3KeySensor
except ImportError:
raise AirflowSensorNotFound(
"This DAG requires a `S3KeySensor`. "
"Install the Airflow AWS provider using : "
"`pip install apache-airflow-providers-amazon`"
)
return S3KeySensor
def get_metaflow_kubernetes_operator():
try:
from airflow.contrib.operators.kubernetes_pod_operator import (
KubernetesPodOperator,
)
except ImportError:
try:
from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import (
KubernetesPodOperator,
)
except ImportError as e:
raise KubernetesProviderNotFound(
"This DAG utilizes `KubernetesPodOperator`. "
"Install the Airflow Kubernetes provider using "
"`%s -m pip install apache-airflow-providers-cncf-kubernetes`"
% sys.executable
)
class MetaflowKubernetesOperator(KubernetesPodOperator):
"""
## Why Inherit the `KubernetesPodOperator` class ?
Two key reasons :
1. So that we can override the `execute` method.
The only change we introduce to the method is to explicitly modify xcom relating to `return_values`.
We do this so that the `XComArg` object can work with `expand` function.
2. So that we can introduce a keyword argument named `mapper_arr`.
This keyword argument can help as a dummy argument for the `KubernetesPodOperator.partial().expand` method. Any Airflow Operator can be dynamically mapped to runtime artifacts using `Operator.partial(**kwargs).extend(**mapper_kwargs)` post the introduction of [Dynamic Task Mapping](https://airflow.apache.org/docs/apache-airflow/stable/concepts/dynamic-task-mapping.html).
The `expand` function takes keyword arguments taken by the operator.
## Why override the `execute` method ?
When we dynamically map vanilla Airflow operators with artifacts generated at runtime, we need to pass that information via `XComArg` to a operator's keyword argument in the `expand` [function](https://airflow.apache.org/docs/apache-airflow/stable/concepts/dynamic-task-mapping.html#mapping-over-result-of-classic-operators).
The `XComArg` object retrieves XCom values for a particular task based on a `key`, the default key being `return_values`.
Oddly dynamic task mapping [doesn't support XCom values from any other key except](https://github.com/apache/airflow/blob/8a34d25049a060a035d4db4a49cd4a0d0b07fb0b/airflow/models/mappedoperator.py#L150) `return_values`
The values of XCom passed by the `KubernetesPodOperator` are mapped to the `return_values` XCom key.
The biggest problem this creates is that the values of the Foreach cardinality are stored inside the dictionary of `return_values` and cannot be accessed trivially like : `XComArg(task)['foreach_key']` since they are resolved during runtime.
This puts us in a bind since the only xcom we can retrieve is the full dictionary and we cannot pass that as the iterable for the mapper tasks.
Hence, we inherit the `execute` method and push custom xcom keys (needed by downstream tasks such as metaflow taskids) and modify `return_values` captured from the container whenever a foreach related xcom is passed.
When we encounter a foreach xcom we resolve the cardinality which is passed to an actual list and return that as `return_values`.
This is later useful in the `Workflow.compile` where the operator's `expand` method is called and we are able to retrieve the xcom value.
"""
template_fields = KubernetesPodOperator.template_fields + (
"metaflow_pathspec",
"metaflow_run_id",
"metaflow_task_id",
"metaflow_attempt",
"metaflow_step_name",
"metaflow_flow_name",
)
def __init__(
self,
*args,
mapper_arr=None,
flow_name=None,
flow_contains_foreach=False,
**kwargs
) -> None:
super().__init__(*args, **kwargs)
self.mapper_arr = mapper_arr
self._flow_name = flow_name
self._flow_contains_foreach = flow_contains_foreach
self.metaflow_pathspec = AIRFLOW_MACROS.pathspec(
self._flow_name, is_foreach=self._flow_contains_foreach
)
self.metaflow_run_id = AIRFLOW_MACROS.RUN_ID
self.metaflow_task_id = AIRFLOW_MACROS.create_task_id(
self._flow_contains_foreach
)
self.metaflow_attempt = AIRFLOW_MACROS.ATTEMPT
self.metaflow_step_name = AIRFLOW_MACROS.STEPNAME
self.metaflow_flow_name = self._flow_name
def execute(self, context):
result = super().execute(context)
if result is None:
return
ti = context["ti"]
if TASK_ID_XCOM_KEY in result:
ti.xcom_push(
key=TASK_ID_XCOM_KEY,
value=result[TASK_ID_XCOM_KEY],
)
if FOREACH_CARDINALITY_XCOM_KEY in result:
return list(range(result[FOREACH_CARDINALITY_XCOM_KEY]))
return MetaflowKubernetesOperator
class AirflowTask(object):
def __init__(
self,
name,
operator_type="kubernetes",
flow_name=None,
is_mapper_node=False,
flow_contains_foreach=False,
):
self.name = name
self._is_mapper_node = is_mapper_node
self._operator_args = None
self._operator_type = operator_type
self._flow_name = flow_name
self._flow_contains_foreach = flow_contains_foreach
@property
def is_mapper_node(self):
return self._is_mapper_node
def set_operator_args(self, **kwargs):
self._operator_args = kwargs
return self
def _make_sensor(self):
TaskSensor = _get_sensor(self._operator_type)
return TaskSensor(
task_id=self.name,
**_parse_sensor_args(self._operator_type, self._operator_args)
)
def to_dict(self):
return {
"name": self.name,
"is_mapper_node": self._is_mapper_node,
"operator_type": self._operator_type,
"operator_args": self._operator_args,
}
@classmethod
def from_dict(cls, task_dict, flow_name=None, flow_contains_foreach=False):
op_args = {} if "operator_args" not in task_dict else task_dict["operator_args"]
is_mapper_node = (
False if "is_mapper_node" not in task_dict else task_dict["is_mapper_node"]
)
return cls(
task_dict["name"],
is_mapper_node=is_mapper_node,
operator_type=(
task_dict["operator_type"]
if "operator_type" in task_dict
else "kubernetes"
),
flow_name=flow_name,
flow_contains_foreach=flow_contains_foreach,
).set_operator_args(**op_args)
def _kubernetes_task(self):
MetaflowKubernetesOperator = get_metaflow_kubernetes_operator()
k8s_args = _kubernetes_pod_operator_args(self._operator_args)
return MetaflowKubernetesOperator(
flow_name=self._flow_name,
flow_contains_foreach=self._flow_contains_foreach,
**k8s_args
)
def _kubernetes_mapper_task(self):
MetaflowKubernetesOperator = get_metaflow_kubernetes_operator()
k8s_args = _kubernetes_pod_operator_args(self._operator_args)
return MetaflowKubernetesOperator.partial(
flow_name=self._flow_name,
flow_contains_foreach=self._flow_contains_foreach,
**k8s_args
)
def to_task(self):
if self._operator_type == "kubernetes":
if not self.is_mapper_node:
return self._kubernetes_task()
else:
return self._kubernetes_mapper_task()
elif self._operator_type in SensorNames.get_supported_sensors():
return self._make_sensor()
class Workflow(object):
def __init__(self, file_path=None, graph_structure=None, metadata=None, **kwargs):
self._dag_instantiation_params = AirflowDAGArgs(**kwargs)
self._file_path = file_path
self._metadata = metadata
tree = lambda: defaultdict(tree)
self.states = tree()
self.metaflow_params = None
self.graph_structure = graph_structure
def set_parameters(self, params):
self.metaflow_params = params
def add_state(self, state):
self.states[state.name] = state
def to_dict(self):
return dict(
metadata=self._metadata,
graph_structure=self.graph_structure,
states={s: v.to_dict() for s, v in self.states.items()},
dag_instantiation_params=self._dag_instantiation_params.serialize(),
file_path=self._file_path,
metaflow_params=self.metaflow_params,
)
def to_json(self):
return json.dumps(self.to_dict())
@classmethod
def from_dict(cls, data_dict):
re_cls = cls(
file_path=data_dict["file_path"],
graph_structure=data_dict["graph_structure"],
metadata=data_dict["metadata"],
)
re_cls._dag_instantiation_params = AirflowDAGArgs.deserialize(
data_dict["dag_instantiation_params"]
)
for sd in data_dict["states"].values():
re_cls.add_state(
AirflowTask.from_dict(sd, flow_name=data_dict["metadata"]["flow_name"])
)
re_cls.set_parameters(data_dict["metaflow_params"])
return re_cls
@classmethod
def from_json(cls, json_string):
data = json.loads(json_string)
return cls.from_dict(data)
def _construct_params(self):
from airflow.models.param import Param
if self.metaflow_params is None:
return {}
param_dict = {}
for p in self.metaflow_params:
name = p["name"]
del p["name"]
param_dict[name] = Param(**p)
return param_dict
def compile(self):
from airflow import DAG
# Airflow 2.0.0 cannot import this, so we have to do it this way.
# `XComArg` is needed for dynamic task mapping and if the airflow installation is of the right
# version (+2.3.0) then the class will be importable.
XComArg = get_xcom_arg_class()
_validate_minimum_airflow_version()
if self._metadata["contains_foreach"]:
_validate_dynamic_mapping_compatibility()
# We need to verify if KubernetesPodOperator is of version > 4.2.0 to support foreachs / dynamic task mapping.
# If the dag uses dynamic Task mapping then we throw an error since the `resources` argument in the `KubernetesPodOperator`
# doesn't work for dynamic task mapping for `KubernetesPodOperator` version < 4.2.0.
# For more context check this issue : https://github.com/apache/airflow/issues/24669
_check_foreach_compatible_kubernetes_provider()
params_dict = self._construct_params()
# DAG Params can be seen here :
# https://airflow.apache.org/docs/apache-airflow/2.0.0/_api/airflow/models/dag/index.html#airflow.models.dag.DAG
# Airflow 2.0.0 Allows setting Params.
dag = DAG(params=params_dict, **self._dag_instantiation_params.arguments)
dag.fileloc = self._file_path if self._file_path is not None else dag.fileloc
def add_node(node, parents, dag):
"""
A recursive function to traverse the specialized
graph_structure datastructure.
"""
if type(node) == str:
task = self.states[node].to_task()
if parents:
for parent in parents:
# Handle foreach nodes.
if self.states[node].is_mapper_node:
task = task.expand(mapper_arr=XComArg(parent))
parent >> task
return [task] # Return Parent
# this means a split from parent
if type(node) == list:
# this means branching since everything within the list is a list
if all(isinstance(n, list) for n in node):
curr_parents = parents
parent_list = []
for node_list in node:
last_parent = add_node(node_list, curr_parents, dag)
parent_list.extend(last_parent)
return parent_list
else:
# this means no branching and everything within the list is not a list and can be actual nodes.
curr_parents = parents
for node_x in node:
curr_parents = add_node(node_x, curr_parents, dag)
return curr_parents
with dag:
parent = None
for node in self.graph_structure:
parent = add_node(node, parent, dag)
return dag
================================================
FILE: metaflow/plugins/airflow/dag.py
================================================
# Deployed on {{deployed_on}}
CONFIG = {{{config}}}
{{{utils}}}
dag = Workflow.from_dict(CONFIG).compile()
with dag:
pass
================================================
FILE: metaflow/plugins/airflow/exception.py
================================================
from metaflow.exception import MetaflowException
class AirflowException(MetaflowException):
headline = "Airflow Exception"
def __init__(self, msg):
super().__init__(msg)
class NotSupportedException(MetaflowException):
headline = "Not yet supported with Airflow"
================================================
FILE: metaflow/plugins/airflow/plumbing/__init__.py
================================================
================================================
FILE: metaflow/plugins/airflow/plumbing/set_parameters.py
================================================
import os
import json
import sys
def export_parameters(output_file):
input = json.loads(os.environ.get("METAFLOW_PARAMETERS", "{}"))
with open(output_file, "w") as f:
for k in input:
# Replace `-` with `_` is parameter names since `-` isn't an
# allowed character for environment variables. cli.py will
# correctly translate the replaced `-`s.
f.write(
"export METAFLOW_INIT_%s=%s\n"
% (k.upper().replace("-", "_"), json.dumps(input[k]))
)
os.chmod(output_file, 509)
if __name__ == "__main__":
export_parameters(sys.argv[1])
================================================
FILE: metaflow/plugins/airflow/sensors/__init__.py
================================================
from .external_task_sensor import ExternalTaskSensorDecorator
from .s3_sensor import S3KeySensorDecorator
SUPPORTED_SENSORS = [
ExternalTaskSensorDecorator,
S3KeySensorDecorator,
]
================================================
FILE: metaflow/plugins/airflow/sensors/base_sensor.py
================================================
import uuid
from metaflow.decorators import FlowDecorator, flow_decorators
from ..exception import AirflowException
from ..airflow_utils import AirflowTask, id_creator, TASK_ID_HASH_LEN
class AirflowSensorDecorator(FlowDecorator):
"""
Base class for all Airflow sensor decorators.
"""
allow_multiple = True
defaults = dict(
timeout=3600,
poke_interval=60,
mode="reschedule",
exponential_backoff=True,
pool=None,
soft_fail=False,
name=None,
description=None,
)
operator_type = None
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._airflow_task_name = None
self._id = str(uuid.uuid4())
def serialize_operator_args(self):
"""
Subclasses will parse the decorator arguments to
Airflow task serializable arguments.
"""
task_args = dict(**self.attributes)
del task_args["name"]
if task_args["description"] is not None:
task_args["doc"] = task_args["description"]
del task_args["description"]
task_args["do_xcom_push"] = True
return task_args
def create_task(self):
task_args = self.serialize_operator_args()
return AirflowTask(
self._airflow_task_name,
operator_type=self.operator_type,
).set_operator_args(**{k: v for k, v in task_args.items() if v is not None})
def validate(self, flow):
"""
Validate if the arguments for the sensor are correct.
"""
# If there is no name set then auto-generate the name. This is done because there can be more than
# one `AirflowSensorDecorator` of the same type.
if self.attributes["name"] is None:
deco_index = [
d._id
for d in flow_decorators(flow)
if issubclass(d.__class__, AirflowSensorDecorator)
].index(self._id)
self._airflow_task_name = "%s-%s" % (
self.operator_type,
id_creator([self.operator_type, str(deco_index)], TASK_ID_HASH_LEN),
)
else:
self._airflow_task_name = self.attributes["name"]
def flow_init(
self, flow, graph, environment, flow_datastore, metadata, logger, echo, options
):
self.validate(flow)
================================================
FILE: metaflow/plugins/airflow/sensors/external_task_sensor.py
================================================
from .base_sensor import AirflowSensorDecorator
from ..airflow_utils import SensorNames
from ..exception import AirflowException
from datetime import timedelta
AIRFLOW_STATES = dict(
QUEUED="queued",
RUNNING="running",
SUCCESS="success",
SHUTDOWN="shutdown", # External request to shut down,
FAILED="failed",
UP_FOR_RETRY="up_for_retry",
UP_FOR_RESCHEDULE="up_for_reschedule",
UPSTREAM_FAILED="upstream_failed",
SKIPPED="skipped",
)
class ExternalTaskSensorDecorator(AirflowSensorDecorator):
"""
The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
Parameters
----------
timeout : int
Time, in seconds before the task times out and fails. (Default: 3600)
poke_interval : int
Time in seconds that the job should wait in between each try. (Default: 60)
mode : str
How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
exponential_backoff : bool
allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
pool : str
the slot pool this task should run in,
slot pools are a way to limit concurrency for certain tasks. (Default:None)
soft_fail : bool
Set to true to mark the task as SKIPPED on failure. (Default: False)
name : str
Name of the sensor on Airflow
description : str
Description of sensor in the Airflow UI
external_dag_id : str
The dag_id that contains the task you want to wait for.
external_task_ids : List[str]
The list of task_ids that you want to wait for.
If None (default value) the sensor waits for the DAG. (Default: None)
allowed_states : List[str]
Iterable of allowed states, (Default: ['success'])
failed_states : List[str]
Iterable of failed or dis-allowed states. (Default: None)
execution_delta : datetime.timedelta
time difference with the previous execution to look at,
the default is the same logical date as the current task or DAG. (Default: None)
check_existence: bool
Set to True to check if the external task exists or check if
the DAG to wait for exists. (Default: True)
"""
operator_type = SensorNames.EXTERNAL_TASK_SENSOR
# Docs:
# https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor
name = "airflow_external_task_sensor"
defaults = dict(
**AirflowSensorDecorator.defaults,
external_dag_id=None,
external_task_ids=None,
allowed_states=[AIRFLOW_STATES["SUCCESS"]],
failed_states=None,
execution_delta=None,
check_existence=True,
# We cannot add `execution_date_fn` as it requires a python callable.
# Passing around a python callable is non-trivial since we are passing a
# callable from metaflow-code to airflow python script. Since we cannot
# transfer dependencies of the callable, we cannot gaurentee that the callable
# behave exactly as the user expects
)
def serialize_operator_args(self):
task_args = super().serialize_operator_args()
if task_args["execution_delta"] is not None:
task_args["execution_delta"] = dict(
seconds=task_args["execution_delta"].total_seconds()
)
return task_args
def validate(self, flow):
if self.attributes["external_dag_id"] is None:
raise AirflowException(
"`%s` argument of `@%s`cannot be `None`."
% ("external_dag_id", self.name)
)
if type(self.attributes["allowed_states"]) == str:
if self.attributes["allowed_states"] not in list(AIRFLOW_STATES.values()):
raise AirflowException(
"`%s` is an invalid input of `%s` for `@%s`. Accepted values are %s"
% (
str(self.attributes["allowed_states"]),
"allowed_states",
self.name,
", ".join(list(AIRFLOW_STATES.values())),
)
)
elif type(self.attributes["allowed_states"]) == list:
enum_not_matched = [
x
for x in self.attributes["allowed_states"]
if x not in list(AIRFLOW_STATES.values())
]
if len(enum_not_matched) > 0:
raise AirflowException(
"`%s` is an invalid input of `%s` for `@%s`. Accepted values are %s"
% (
str(" OR ".join(["'%s'" % i for i in enum_not_matched])),
"allowed_states",
self.name,
", ".join(list(AIRFLOW_STATES.values())),
)
)
else:
self.attributes["allowed_states"] = [AIRFLOW_STATES["SUCCESS"]]
if self.attributes["execution_delta"] is not None:
if not isinstance(self.attributes["execution_delta"], timedelta):
raise AirflowException(
"`%s` is an invalid input type of `execution_delta` for `@%s`. Accepted type is `datetime.timedelta`"
% (
str(type(self.attributes["execution_delta"])),
self.name,
)
)
super().validate(flow)
================================================
FILE: metaflow/plugins/airflow/sensors/s3_sensor.py
================================================
from .base_sensor import AirflowSensorDecorator
from ..airflow_utils import SensorNames
from ..exception import AirflowException
class S3KeySensorDecorator(AirflowSensorDecorator):
"""
The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
added as a flow decorators. Adding more than one decorator will ensure that `start` step
starts only after all sensors finish.
Parameters
----------
timeout : int
Time, in seconds before the task times out and fails. (Default: 3600)
poke_interval : int
Time in seconds that the job should wait in between each try. (Default: 60)
mode : str
How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
exponential_backoff : bool
allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
pool : str
the slot pool this task should run in,
slot pools are a way to limit concurrency for certain tasks. (Default:None)
soft_fail : bool
Set to true to mark the task as SKIPPED on failure. (Default: False)
name : str
Name of the sensor on Airflow
description : str
Description of sensor in the Airflow UI
bucket_key : Union[str, List[str]]
The key(s) being waited on. Supports full s3:// style url or relative path from root level.
When it's specified as a full s3:// url, please leave `bucket_name` as None
bucket_name : str
Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
wildcard_match : bool
whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
aws_conn_id : str
a reference to the s3 connection on Airflow. (Default: None)
verify : bool
Whether or not to verify SSL certificates for S3 connection. (Default: None)
"""
name = "airflow_s3_key_sensor"
operator_type = SensorNames.S3_SENSOR
# Arg specification can be found here :
# https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor
defaults = dict(
**AirflowSensorDecorator.defaults,
bucket_key=None, # Required
bucket_name=None,
wildcard_match=False,
aws_conn_id=None,
verify=None, # `verify (Optional[Union[str, bool]])` Whether or not to verify SSL certificates for S3 connection.
# `verify` is a airflow variable.
)
def validate(self, flow):
if self.attributes["bucket_key"] is None:
raise AirflowException(
"`bucket_key` for `@%s`cannot be empty." % (self.name)
)
super().validate(flow)
================================================
FILE: metaflow/plugins/argo/__init__.py
================================================
================================================
FILE: metaflow/plugins/argo/argo_client.py
================================================
import json
from metaflow.metaflow_config import ARGO_EVENTS_SENSOR_NAMESPACE
from metaflow.exception import MetaflowException
from metaflow.plugins.kubernetes.kubernetes_client import KubernetesClient
class ArgoClientException(MetaflowException):
headline = "Argo Client error"
class ArgoResourceNotFound(MetaflowException):
headline = "Resource not found"
class ArgoNotPermitted(MetaflowException):
headline = "Operation not permitted"
class ArgoClient(object):
def __init__(self, namespace=None):
self._client = KubernetesClient()
self._namespace = namespace or "default"
self._group = "argoproj.io"
self._version = "v1alpha1"
def get_workflow(self, name):
client = self._client.get()
try:
workflow = client.CustomObjectsApi().get_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="workflows",
name=name,
)
except client.rest.ApiException as e:
if e.status == 404:
return None
raise ArgoClientException(
json.loads(e.body)["message"] if e.body is not None else e.reason
)
return workflow
def get_workflow_template(self, name):
client = self._client.get()
try:
return client.CustomObjectsApi().get_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="workflowtemplates",
name=name,
)
except client.rest.ApiException as e:
if e.status == 404:
return None
raise ArgoClientException(
json.loads(e.body)["message"] if e.body is not None else e.reason
)
def get_workflow_templates(self, page_size=100):
client = self._client.get()
continue_token = None
while True:
try:
params = {"limit": page_size}
if continue_token:
params["_continue"] = continue_token
response = client.CustomObjectsApi().list_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="workflowtemplates",
**params,
)
for item in response.get("items", []):
yield item
metadata = response.get("metadata", {})
continue_token = metadata.get("continue")
if not continue_token:
break
except client.rest.ApiException as e:
error_body = json.loads(e.body) if e.body else {}
error_message = error_body.get("message", e.reason)
if e.status == 404:
return None
elif e.status == 410 and error_body.get("reason") == "Expired":
new_token = error_body.get("metadata", {}).get("continue")
if new_token:
continue_token = new_token
continue
raise ArgoClientException(error_message)
def register_workflow_template(self, name, workflow_template):
# Unfortunately, Kubernetes client does not handle optimistic
# concurrency control by itself unlike kubectl
client = self._client.get()
try:
workflow_template["metadata"][
"resourceVersion"
] = client.CustomObjectsApi().get_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="workflowtemplates",
name=name,
)[
"metadata"
][
"resourceVersion"
]
except client.rest.ApiException as e:
if e.status == 404:
try:
return client.CustomObjectsApi().create_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="workflowtemplates",
body=workflow_template,
)
except client.rest.ApiException as e:
raise ArgoClientException(
json.loads(e.body)["message"]
if e.body is not None
else e.reason
)
else:
raise ArgoClientException(
json.loads(e.body)["message"] if e.body is not None else e.reason
)
try:
return client.CustomObjectsApi().replace_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="workflowtemplates",
body=workflow_template,
name=name,
)
except client.rest.ApiException as e:
raise ArgoClientException(
json.loads(e.body)["message"] if e.body is not None else e.reason
)
def delete_cronworkflow(self, name):
"""
Issues an API call for deleting a cronworkflow
Returns either the successful API response, or None in case the resource was not found.
"""
client = self._client.get()
try:
return client.CustomObjectsApi().delete_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="cronworkflows",
name=name,
)
except client.rest.ApiException as e:
if e.status == 404:
return None
else:
raise wrap_api_error(e)
def delete_workflow_template(self, name):
"""
Issues an API call for deleting a cronworkflow
Returns either the successful API response, or None in case the resource was not found.
"""
client = self._client.get()
try:
return client.CustomObjectsApi().delete_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="workflowtemplates",
name=name,
)
except client.rest.ApiException as e:
if e.status == 404:
return None
else:
raise wrap_api_error(e)
def terminate_workflow(self, name):
client = self._client.get()
try:
workflow = client.CustomObjectsApi().get_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="workflows",
name=name,
)
except client.rest.ApiException as e:
raise ArgoClientException(
json.loads(e.body)["message"] if e.body is not None else e.reason
)
if workflow["status"]["finishedAt"] is not None:
raise ArgoClientException(
"Cannot terminate an execution that has already finished."
)
if workflow["spec"].get("shutdown") == "Terminate":
raise ArgoClientException("Execution has already been terminated.")
try:
body = {"spec": workflow["spec"]}
body["spec"]["shutdown"] = "Terminate"
return client.CustomObjectsApi().patch_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="workflows",
name=name,
body=body,
)
except client.rest.ApiException as e:
raise ArgoClientException(
json.loads(e.body)["message"] if e.body is not None else e.reason
)
def suspend_workflow(self, name):
workflow = self.get_workflow(name)
if workflow is None:
raise ArgoClientException("Execution argo-%s was not found" % name)
if workflow["status"]["finishedAt"] is not None:
raise ArgoClientException(
"Cannot suspend an execution that has already finished."
)
if workflow["spec"].get("suspend") is True:
raise ArgoClientException("Execution has already been suspended.")
body = {"spec": workflow["spec"]}
body["spec"]["suspend"] = True
return self._patch_workflow(name, body)
def unsuspend_workflow(self, name):
workflow = self.get_workflow(name)
if workflow is None:
raise ArgoClientException("Execution argo-%s was not found" % name)
if workflow["status"]["finishedAt"] is not None:
raise ArgoClientException(
"Cannot unsuspend an execution that has already finished."
)
if not workflow["spec"].get("suspend", False):
raise ArgoClientException("Execution is already proceeding.")
body = {"spec": workflow["spec"]}
body["spec"]["suspend"] = False
return self._patch_workflow(name, body)
def _patch_workflow(self, name, body):
client = self._client.get()
try:
return client.CustomObjectsApi().patch_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="workflows",
name=name,
body=body,
)
except client.rest.ApiException as e:
raise ArgoClientException(
json.loads(e.body)["message"] if e.body is not None else e.reason
)
def trigger_workflow_template(self, name, usertype, username, parameters={}):
client = self._client.get()
body = {
"apiVersion": "argoproj.io/v1alpha1",
"kind": "Workflow",
"metadata": {
"generateName": name + "-",
"annotations": {
"metaflow/triggered_by_user": json.dumps(
{"type": usertype, "name": username}
)
},
},
"spec": {
"workflowTemplateRef": {"name": name},
"arguments": {
"parameters": [
{"name": k, "value": json.dumps(v)}
for k, v in parameters.items()
]
},
},
}
try:
return client.CustomObjectsApi().create_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="workflows",
body=body,
)
except client.rest.ApiException as e:
raise ArgoClientException(
json.loads(e.body)["message"] if e.body is not None else e.reason
)
def schedule_workflow_template(self, name, schedule=None, timezone=None):
# Unfortunately, Kubernetes client does not handle optimistic
# concurrency control by itself unlike kubectl
client = self._client.get()
body = {
"apiVersion": "argoproj.io/v1alpha1",
"kind": "CronWorkflow",
"metadata": {"name": name},
"spec": {
"suspend": schedule is None,
"schedule": schedule,
"timezone": timezone,
"failedJobsHistoryLimit": 10000, # default is unfortunately 1
"successfulJobsHistoryLimit": 10000, # default is unfortunately 3
"workflowSpec": {"workflowTemplateRef": {"name": name}},
"startingDeadlineSeconds": 3540, # configuring this to 59 minutes so a failed trigger of cron workflow can succeed at most 59 mins after scheduled execution
},
}
try:
body["metadata"][
"resourceVersion"
] = client.CustomObjectsApi().get_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="cronworkflows",
name=name,
)[
"metadata"
][
"resourceVersion"
]
except client.rest.ApiException as e:
# Scheduled workflow does not exist and we want to schedule a workflow
if e.status == 404:
if schedule is None:
return
try:
return client.CustomObjectsApi().create_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="cronworkflows",
body=body,
)
except client.rest.ApiException as e:
raise ArgoClientException(
json.loads(e.body)["message"]
if e.body is not None
else e.reason
)
else:
raise ArgoClientException(
json.loads(e.body)["message"] if e.body is not None else e.reason
)
try:
return client.CustomObjectsApi().replace_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=self._namespace,
plural="cronworkflows",
body=body,
name=name,
)
except client.rest.ApiException as e:
raise ArgoClientException(
json.loads(e.body)["message"] if e.body is not None else e.reason
)
def register_sensor(
self, name, sensor=None, sensor_namespace=ARGO_EVENTS_SENSOR_NAMESPACE
):
if sensor is None:
sensor = {}
# Unfortunately, Kubernetes client does not handle optimistic
# concurrency control by itself unlike kubectl
client = self._client.get()
if not sensor:
sensor["metadata"] = {}
try:
sensor["metadata"][
"resourceVersion"
] = client.CustomObjectsApi().get_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=sensor_namespace,
plural="sensors",
name=name,
)[
"metadata"
][
"resourceVersion"
]
except client.rest.ApiException as e:
# Sensor does not exist and we want to add one
if e.status == 404:
try:
return client.CustomObjectsApi().create_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=sensor_namespace,
plural="sensors",
body=sensor,
)
except client.rest.ApiException as e:
raise ArgoClientException(
json.loads(e.body)["message"]
if e.body is not None
else e.reason
)
else:
raise ArgoClientException(
json.loads(e.body)["message"] if e.body is not None else e.reason
)
try:
return client.CustomObjectsApi().replace_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=sensor_namespace,
plural="sensors",
body=sensor,
name=name,
)
except client.rest.ApiException as e:
raise ArgoClientException(
json.loads(e.body)["message"] if e.body is not None else e.reason
)
def delete_sensor(self, name, sensor_namespace):
"""
Issues an API call for deleting a sensor
Returns either the successful API response, or None in case the resource was not found.
"""
client = self._client.get()
try:
return client.CustomObjectsApi().delete_namespaced_custom_object(
group=self._group,
version=self._version,
namespace=sensor_namespace,
plural="sensors",
name=name,
)
except client.rest.ApiException as e:
if e.status == 404:
return None
raise wrap_api_error(e)
def wrap_api_error(error):
message = (
json.loads(error.body)["message"] if error.body is not None else error.reason
)
# catch all
ex = ArgoClientException(message)
if error.status == 404:
# usually handled outside this function as most cases want to return None instead.
ex = ArgoResourceNotFound(message)
if error.status == 403:
ex = ArgoNotPermitted(message)
return ex
================================================
FILE: metaflow/plugins/argo/argo_events.py
================================================
import json
import os
import sys
import time
import urllib
import uuid
from datetime import datetime
from metaflow.exception import MetaflowException
from metaflow.metaflow_config import (
ARGO_EVENTS_WEBHOOK_AUTH,
ARGO_EVENTS_WEBHOOK_URL,
SERVICE_HEADERS,
SERVICE_RETRY_COUNT,
)
class ArgoEventException(MetaflowException):
headline = "Argo Event Exception"
class ArgoEvent(object):
"""
ArgoEvent is a small event, a message, that can be published to Argo Workflows. The
event will eventually start all flows which have been previously deployed with `@trigger`
to wait for this particular named event.
Parameters
----------
name : Union[str, Callable[[], str]]
Name of the event, or a callable (invoked with no arguments) that returns the event name (e.g., `namespaced_event_name('foo')`).
url : str, optional
Override the event endpoint from `ARGO_EVENTS_WEBHOOK_URL`.
payload : Dict, optional
A set of key-value pairs delivered in this event. Used to set parameters of triggered flows.
"""
def __init__(
self, name, url=ARGO_EVENTS_WEBHOOK_URL, payload=None, access_token=None
):
# TODO: Introduce support for NATS
if callable(name):
name = name()
if not isinstance(name, str):
raise ArgoEventException(
"Callable for 'name' must return a string, got %s"
% type(name).__name__
)
self._name = name
self._url = url
self._payload = payload or {}
self._access_token = access_token
def add_to_payload(self, key, value):
"""
Add a key-value pair in the payload. This is typically used to set parameters
of triggered flows. Often, `key` is the parameter name you want to set to
`value`. Overrides any existing value of `key`.
Parameters
----------
key : str
Key
value : str
Value
"""
self._payload[key] = str(value)
return self
def safe_publish(self, payload=None, ignore_errors=True):
"""
Publishes an event when called inside a deployed workflow. Outside a deployed workflow
this function does nothing.
Use this function inside flows to create events safely. As this function is a no-op
for local runs, you can safely call it during local development without causing unintended
side-effects. It takes effect only when deployed on Argo Workflows.
Parameters
----------
payload : dict
Additional key-value pairs to add to the payload.
ignore_errors : bool, default True
If True, events are created on a best effort basis - errors are silently ignored.
"""
return self.publish(payload=payload, force=False, ignore_errors=ignore_errors)
def publish(self, payload=None, force=True, ignore_errors=True):
"""
Publishes an event.
Note that the function returns immediately after the event has been sent. It
does not wait for flows to start, nor it guarantees that any flows will start.
Parameters
----------
payload : dict
Additional key-value pairs to add to the payload.
ignore_errors : bool, default True
If True, events are created on a best effort basis - errors are silently ignored.
"""
if payload == None:
payload = {}
# Publish event iff forced or running on Argo Workflows
if force or os.environ.get("ARGO_WORKFLOW_TEMPLATE"):
try:
headers = {}
if self._access_token:
# TODO: Test with bearer tokens
headers = {"Authorization": "Bearer {}".format(self._access_token)}
if ARGO_EVENTS_WEBHOOK_AUTH == "service":
headers.update(SERVICE_HEADERS)
# TODO: do we need to worry about certs?
# Use urllib to avoid introducing any dependency in Metaflow
data = {
"name": self._name,
"payload": {
# Add default fields here...
"name": self._name,
"id": str(uuid.uuid4()),
"timestamp": int(time.time()),
"utc_date": datetime.utcnow().strftime("%Y%m%d"),
"generated-by-metaflow": True,
**self._payload,
**payload,
},
}
request = urllib.request.Request(
self._url,
method="POST",
headers={"Content-Type": "application/json", **headers},
data=json.dumps(data).encode("utf-8"),
)
for i in range(SERVICE_RETRY_COUNT):
try:
# we do not want to wait indefinitely for a response on the event broadcast, as this will keep the task running.
urllib.request.urlopen(request, timeout=60)
print(
"Argo Event (%s) published." % self._name, file=sys.stderr
)
return data["payload"]["id"]
except urllib.error.HTTPError as e:
# TODO: Retry retryable HTTP error codes
raise e
except urllib.error.URLError as e:
if i == SERVICE_RETRY_COUNT - 1:
raise e
else:
time.sleep(2**i)
except Exception as e:
msg = "Unable to publish Argo Event (%s): %s" % (self._name, e)
if ignore_errors:
print(msg, file=sys.stderr)
else:
raise ArgoEventException(msg)
else:
msg = (
"Argo Event (%s) was not published. Use "
+ "ArgoEvent(...).publish(...) "
+ "to force publish."
) % self._name
if ignore_errors:
print(msg, file=sys.stderr)
else:
raise ArgoEventException(msg)
================================================
FILE: metaflow/plugins/argo/argo_workflows.py
================================================
import base64
import json
import os
import re
import shlex
import sys
from collections import defaultdict
from hashlib import sha1
from math import inf
from typing import List
from metaflow import JSONType, current
from metaflow.decorators import flow_decorators
from metaflow.exception import MetaflowException
from metaflow.graph import FlowGraph
from metaflow.includefile import FilePathClass
from metaflow.metaflow_config import (
ARGO_EVENTS_EVENT,
ARGO_EVENTS_EVENT_BUS,
ARGO_EVENTS_EVENT_SOURCE,
ARGO_EVENTS_INTERNAL_WEBHOOK_URL,
ARGO_EVENTS_SENSOR_NAMESPACE,
ARGO_EVENTS_SERVICE_ACCOUNT,
ARGO_EVENTS_WEBHOOK_AUTH,
ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT,
ARGO_WORKFLOWS_ENV_VARS_TO_SKIP,
ARGO_WORKFLOWS_KUBERNETES_SECRETS,
ARGO_WORKFLOWS_UI_URL,
AWS_SECRETS_MANAGER_DEFAULT_REGION,
AZURE_KEY_VAULT_PREFIX,
AZURE_STORAGE_BLOB_SERVICE_ENDPOINT,
CARD_AZUREROOT,
CARD_GSROOT,
CARD_S3ROOT,
DATASTORE_SYSROOT_AZURE,
DATASTORE_SYSROOT_GS,
DATASTORE_SYSROOT_S3,
DATATOOLS_S3ROOT,
DEFAULT_METADATA,
DEFAULT_SECRETS_BACKEND_TYPE,
GCP_SECRET_MANAGER_PREFIX,
KUBERNETES_FETCH_EC2_METADATA,
KUBERNETES_NAMESPACE,
KUBERNETES_SANDBOX_INIT_SCRIPT,
KUBERNETES_SECRETS,
S3_ENDPOINT_URL,
S3_SERVER_SIDE_ENCRYPTION,
SERVICE_HEADERS,
SERVICE_INTERNAL_URL,
UI_URL,
)
from metaflow.metaflow_config_funcs import config_values
from metaflow.mflog import BASH_SAVE_LOGS, bash_capture_logs, export_mflog_env_vars
from metaflow.parameters import deploy_time_eval
from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
from metaflow.plugins.kubernetes.kubernetes_jobsets import KubernetesArgoJobSet
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
from metaflow.user_configs.config_options import ConfigInput
from metaflow.util import (
compress_list,
dict_to_cli_options,
to_bytes,
to_camelcase,
to_unicode,
)
from .argo_client import ArgoClient
from .exit_hooks import ExitHookHack, HttpExitHook, ContainerHook
from metaflow.util import resolve_identity
class ArgoWorkflowsException(MetaflowException):
headline = "Argo Workflows error"
class ArgoWorkflowsSensorCleanupException(MetaflowException):
headline = "Argo Workflows sensor clean up error"
class ArgoWorkflowsSchedulingException(MetaflowException):
headline = "Argo Workflows scheduling error"
# List of future enhancements -
# 1. Configure Argo metrics.
# 2. Support resuming failed workflows within Argo Workflows.
# 3. Add Metaflow tags to labels/annotations.
# 4. Support R lang.
# 5. Ping @savin at slack.outerbounds.co for any feature request
class ArgoWorkflows(object):
def __init__(
self,
name,
graph: FlowGraph,
flow,
code_package_metadata,
code_package_sha,
code_package_url,
production_token,
metadata,
flow_datastore,
environment,
event_logger,
monitor,
tags=None,
namespace=None,
username=None,
max_workers=None,
workflow_timeout=None,
workflow_priority=None,
auto_emit_argo_events=False,
notify_on_error=False,
notify_on_success=False,
notify_slack_webhook_url=None,
notify_pager_duty_integration_key=None,
notify_incident_io_api_key=None,
incident_io_alert_source_config_id=None,
incident_io_metadata: List[str] = None,
enable_heartbeat_daemon=True,
enable_error_msg_capture=False,
workflow_title=None,
workflow_description=None,
):
# Some high-level notes -
#
# Fail-fast behavior for Argo Workflows - Argo stops
# scheduling new steps as soon as it detects that one of the DAG nodes
# has failed. After waiting for all the scheduled DAG nodes to run till
# completion, Argo with fail the DAG. This implies that after a node
# has failed, it may be awhile before the entire DAG is marked as
# failed. There is nothing Metaflow can do here for failing even
# faster (as of Argo 3.2).
#
# argo stop` vs `argo terminate` - since we don't currently
# rely on any exit handlers, it's safe to either stop or terminate any running
# argo workflow deployed through Metaflow. This may not hold true, once we
# integrate with Argo Events.
#
# Currently, an Argo Workflow can only execute entirely within a single
# Kubernetes namespace. Multi-cluster / Multi-namespace execution is on the
# deck for v3.4 release for Argo Workflows; beyond which point, we will be
# able to support them natively.
#
# Since this implementation generates numerous templates on the fly, please
# ensure that your Argo Workflows controller doesn't restrict
# templateReferencing.
self.name = name
self.graph = graph
self._parse_conditional_branches()
self.flow = flow
self.code_package_metadata = code_package_metadata
self.code_package_sha = code_package_sha
self.code_package_url = code_package_url
self.production_token = production_token
self.metadata = metadata
self.flow_datastore = flow_datastore
self.environment = environment
self.event_logger = event_logger
self.monitor = monitor
self.tags = tags
self.namespace = namespace
self.username = username
self.max_workers = max_workers
self.workflow_timeout = workflow_timeout
self.workflow_priority = workflow_priority
self.auto_emit_argo_events = auto_emit_argo_events
self.notify_on_error = notify_on_error
self.notify_on_success = notify_on_success
self.notify_slack_webhook_url = notify_slack_webhook_url
self.notify_pager_duty_integration_key = notify_pager_duty_integration_key
self.notify_incident_io_api_key = notify_incident_io_api_key
self.incident_io_alert_source_config_id = incident_io_alert_source_config_id
self.incident_io_metadata = self.parse_incident_io_metadata(
incident_io_metadata
)
self.enable_heartbeat_daemon = enable_heartbeat_daemon
self.enable_error_msg_capture = enable_error_msg_capture
self.workflow_title = workflow_title
self.workflow_description = workflow_description
self.parameters = self._process_parameters()
self.config_parameters = self._process_config_parameters()
self.triggers, self.trigger_options = self._process_triggers()
self._schedule, self._timezone = self._get_schedule()
self._base_labels = self._base_kubernetes_labels()
self._base_annotations = self._base_kubernetes_annotations()
self._workflow_template = self._compile_workflow_template()
self._sensor = self._compile_sensor()
def __str__(self):
return str(self._workflow_template)
def deploy(self):
self.cleanup_previous_sensors()
try:
# Register workflow template.
ArgoClient(namespace=KUBERNETES_NAMESPACE).register_workflow_template(
self.name, self._workflow_template.to_json()
)
except Exception as e:
raise ArgoWorkflowsException(str(e))
def cleanup_previous_sensors(self):
try:
client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
# Check for existing deployment and do cleanup
old_template = client.get_workflow_template(self.name)
if not old_template:
return None
# Clean up old sensors
old_sensor_namespace = old_template["metadata"]["annotations"].get(
"metaflow/sensor_namespace"
)
if old_sensor_namespace is None:
# This workflow was created before sensor annotations
# and may have a sensor in the default namespace
# we will delete it and it'll get recreated if need be
old_sensor_name = ArgoWorkflows._sensor_name(self.name)
client.delete_sensor(old_sensor_name, client._namespace)
else:
# delete old sensor only if it was somewhere else, otherwise it'll get replaced
old_sensor_name = old_template["metadata"]["annotations"][
"metaflow/sensor_name"
]
if (
not self._sensor
or old_sensor_namespace != ARGO_EVENTS_SENSOR_NAMESPACE
):
client.delete_sensor(old_sensor_name, old_sensor_namespace)
except Exception as e:
raise ArgoWorkflowsSensorCleanupException(str(e))
@staticmethod
def _sanitize(name):
# Metaflow allows underscores in node names, which are disallowed in Argo
# Workflow template names - so we swap them with hyphens which are not
# allowed by Metaflow - guaranteeing uniqueness.
return name.replace("_", "-")
@staticmethod
def _sensor_name(name):
# Unfortunately, Argo Events Sensor names don't allow for
# dots (sensors run into an error) which rules out self.name :(
return name.replace(".", "-")
@staticmethod
def list_templates(flow_name, all=False, page_size=100):
client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
for template in client.get_workflow_templates(page_size=page_size):
if all or flow_name == template["metadata"].get("annotations", {}).get(
"metaflow/flow_name", None
):
yield template["metadata"]["name"]
@staticmethod
def delete(name):
client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
# the workflow template might not exist, but we still want to try clean up associated sensors and schedules.
workflow_template = client.get_workflow_template(name) or {}
workflow_annotations = workflow_template.get("metadata", {}).get(
"annotations", {}
)
sensor_name = ArgoWorkflows._sensor_name(
workflow_annotations.get("metaflow/sensor_name", name)
)
# if below is missing then it was deployed before custom sensor namespaces
sensor_namespace = workflow_annotations.get(
"metaflow/sensor_namespace", KUBERNETES_NAMESPACE
)
# Always try to delete the schedule. Failure in deleting the schedule should not
# be treated as an error, due to any of the following reasons
# - there might not have been a schedule, or it was deleted by some other means
# - retaining these resources should have no consequences as long as the workflow deletion succeeds.
# - regarding cost and compute, the significant resources are part of the workflow teardown, not the schedule.
schedule_deleted = client.delete_cronworkflow(name)
# The workflow might have sensors attached to it, which consume actual resources.
# Try to delete these as well.
sensor_deleted = client.delete_sensor(sensor_name, sensor_namespace)
# After cleaning up related resources, delete the workflow in question.
# Failure in deleting is treated as critical and will be made visible to the user
# for further action.
workflow_deleted = client.delete_workflow_template(name)
if workflow_deleted is None:
raise ArgoWorkflowsException(
"The workflow *%s* doesn't exist on Argo Workflows." % name
)
return schedule_deleted, sensor_deleted, workflow_deleted
@classmethod
def terminate(cls, flow_name, name):
client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
response = client.terminate_workflow(name)
if response is None:
raise ArgoWorkflowsException(
"No execution found for {flow_name}/{run_id} in Argo Workflows.".format(
flow_name=flow_name, run_id=name
)
)
return True
@staticmethod
def get_workflow_status(flow_name, name):
client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
# TODO: Only look for workflows for the specified flow
workflow = client.get_workflow(name)
if workflow:
# return workflow phase for now
status = workflow.get("status", {}).get("phase")
return status
else:
raise ArgoWorkflowsException(
"No execution found for {flow_name}/{run_id} in Argo Workflows.".format(
flow_name=flow_name, run_id=name
)
)
@staticmethod
def suspend(name):
client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
client.suspend_workflow(name)
return True
@staticmethod
def unsuspend(name):
client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
client.unsuspend_workflow(name)
return True
@staticmethod
def parse_incident_io_metadata(metadata: List[str] = None):
"parse key value pairs into a dict for incident.io metadata if given"
parsed_metadata = None
if metadata is not None:
parsed_metadata = {}
for kv in metadata:
key, value = kv.split("=", 1)
if key in parsed_metadata:
raise MetaflowException(
"Incident.io Metadata *%s* provided multiple times" % key
)
parsed_metadata[key] = value
return parsed_metadata
@classmethod
def trigger(cls, name, parameters=None):
if parameters is None:
parameters = {}
try:
workflow_template = ArgoClient(
namespace=KUBERNETES_NAMESPACE
).get_workflow_template(name)
except Exception as e:
raise ArgoWorkflowsException(str(e))
if workflow_template is None:
raise ArgoWorkflowsException(
"The workflow *%s* doesn't exist on Argo Workflows in namespace *%s*. "
"Please deploy your flow first." % (name, KUBERNETES_NAMESPACE)
)
else:
try:
# Check that the workflow was deployed through Metaflow
workflow_template["metadata"]["annotations"]["metaflow/owner"]
except KeyError:
raise ArgoWorkflowsException(
"An existing non-metaflow workflow with the same name as "
"*%s* already exists in Argo Workflows. \nPlease modify the "
"name of this flow or delete your existing workflow on Argo "
"Workflows before proceeding." % name
)
try:
id_parts = resolve_identity().split(":")
parts_size = len(id_parts)
usertype = id_parts[0] if parts_size > 0 else "unknown"
username = id_parts[1] if parts_size > 1 else "unknown"
return ArgoClient(namespace=KUBERNETES_NAMESPACE).trigger_workflow_template(
name,
usertype,
username,
parameters,
)
except Exception as e:
raise ArgoWorkflowsException(str(e))
def _base_kubernetes_labels(self):
"""
Get shared Kubernetes labels for Argo resources.
"""
# TODO: Add configuration through an environment variable or Metaflow config in the future if required.
labels = {"app.kubernetes.io/part-of": "metaflow"}
return labels
def _base_kubernetes_annotations(self):
"""
Get shared Kubernetes annotations for Argo resources.
"""
from datetime import datetime, timezone
# TODO: Add configuration through an environment variable or Metaflow config in the future if required.
# base annotations
annotations = {
"metaflow/production_token": self.production_token,
"metaflow/owner": self.username,
"metaflow/user": "argo-workflows",
"metaflow/flow_name": self.flow.name,
"metaflow/deployment_timestamp": str(
datetime.now(timezone.utc).isoformat()
),
}
if current.get("project_name"):
annotations.update(
{
"metaflow/project_name": current.project_name,
"metaflow/branch_name": current.branch_name,
"metaflow/project_flow_name": current.project_flow_name,
}
)
# Add Argo Workflows title and description annotations
# https://argo-workflows.readthedocs.io/en/latest/title-and-description/
# Use CLI-provided values or auto-populate from metadata
title = (
(self.workflow_title.strip() if self.workflow_title else None)
or current.get("project_flow_name")
or self.flow.name
)
description = (
self.workflow_description.strip() if self.workflow_description else None
) or (self.flow.__doc__.strip() if self.flow.__doc__ else None)
if title:
annotations["workflows.argoproj.io/title"] = title
if description:
annotations["workflows.argoproj.io/description"] = description
return annotations
def _get_schedule(self):
schedule = self.flow._flow_decorators.get("schedule")
if schedule:
# Remove the field "Year" if it exists
schedule = schedule[0]
return " ".join(schedule.schedule.split()[:5]), schedule.timezone
return None, None
def schedule(self):
try:
argo_client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
argo_client.schedule_workflow_template(
self.name, self._schedule, self._timezone
)
# Register sensor.
# Metaflow will overwrite any existing sensor.
sensor_name = ArgoWorkflows._sensor_name(self.name)
if self._sensor:
# The new sensor will go into the sensor namespace specified
ArgoClient(namespace=ARGO_EVENTS_SENSOR_NAMESPACE).register_sensor(
sensor_name, self._sensor.to_json(), ARGO_EVENTS_SENSOR_NAMESPACE
)
except Exception as e:
raise ArgoWorkflowsSchedulingException(str(e))
def trigger_explanation(self):
# Trigger explanation for cron workflows
if self.flow._flow_decorators.get("schedule"):
return (
"This workflow triggers automatically via the CronWorkflow *%s*."
% self.name
)
# Trigger explanation for @trigger
elif self.flow._flow_decorators.get("trigger"):
return (
"This workflow triggers automatically when the upstream %s "
"is/are published."
% self.list_to_prose(
[event["name"] for event in self.triggers], "event"
)
)
# Trigger explanation for @trigger_on_finish
elif self.flow._flow_decorators.get("trigger_on_finish"):
return (
"This workflow triggers automatically when the upstream %s succeed(s)"
% self.list_to_prose(
[
# Truncate prefix `metaflow.` and suffix `.end` from event name
event["name"][len("metaflow.") : -len(".end")]
for event in self.triggers
],
"flow",
)
)
else:
return "No triggers defined. You need to launch this workflow manually."
@classmethod
def get_existing_deployment(cls, name):
workflow_template = ArgoClient(
namespace=KUBERNETES_NAMESPACE
).get_workflow_template(name)
if workflow_template is not None:
try:
return (
workflow_template["metadata"]["annotations"]["metaflow/owner"],
workflow_template["metadata"]["annotations"][
"metaflow/production_token"
],
)
except KeyError:
raise ArgoWorkflowsException(
"An existing non-metaflow workflow with the same name as "
"*%s* already exists in Argo Workflows. \nPlease modify the "
"name of this flow or delete your existing workflow on Argo "
"Workflows before proceeding." % name
)
return None
@classmethod
def get_execution(cls, name):
workflow = ArgoClient(namespace=KUBERNETES_NAMESPACE).get_workflow(name)
if workflow is not None:
try:
return (
workflow["metadata"]["annotations"]["metaflow/owner"],
workflow["metadata"]["annotations"]["metaflow/production_token"],
workflow["metadata"]["annotations"]["metaflow/flow_name"],
workflow["metadata"]["annotations"].get(
"metaflow/branch_name", None
),
workflow["metadata"]["annotations"].get(
"metaflow/project_name", None
),
)
except KeyError:
raise ArgoWorkflowsException(
"A non-metaflow workflow *%s* already exists in Argo Workflows."
% name
)
return None
def _process_parameters(self):
parameters = {}
has_schedule = self.flow._flow_decorators.get("schedule") is not None
seen = set()
for var, param in self.flow._get_parameters():
# Throw an exception if the parameter is specified twice.
norm = param.name.lower()
if norm in seen:
raise MetaflowException(
"Parameter *%s* is specified twice. "
"Note that parameter names are "
"case-insensitive." % param.name
)
seen.add(norm)
# NOTE: We skip config parameters as these do not have dynamic values,
# and need to be treated differently.
if param.IS_CONFIG_PARAMETER:
continue
extra_attrs = {}
if param.kwargs.get("type") == JSONType:
param_type = str(param.kwargs.get("type").name)
elif isinstance(param.kwargs.get("type"), FilePathClass):
param_type = str(param.kwargs.get("type").name)
extra_attrs["is_text"] = getattr(
param.kwargs.get("type"), "_is_text", True
)
extra_attrs["encoding"] = getattr(
param.kwargs.get("type"), "_encoding", "utf-8"
)
else:
param_type = str(param.kwargs.get("type").__name__)
is_required = param.kwargs.get("required", False)
# Throw an exception if a schedule is set for a flow with required
# parameters with no defaults. We currently don't have any notion
# of data triggers in Argo Workflows.
if "default" not in param.kwargs and is_required and has_schedule:
raise MetaflowException(
"The parameter *%s* does not have a default and is required. "
"Scheduling such parameters via Argo CronWorkflows is not "
"currently supported." % param.name
)
default_value = deploy_time_eval(param.kwargs.get("default"))
# If the value is not required and the value is None, we set the value to
# the JSON equivalent of None to please argo-workflows. Unfortunately it
# has the side effect of casting the parameter value to string null during
# execution - which needs to be fixed imminently.
if default_value is None:
default_value = json.dumps(None)
elif param_type == "JSON":
if not isinstance(default_value, str):
# once to serialize the default value if needed.
default_value = json.dumps(default_value)
# adds outer quotes to param
default_value = json.dumps(default_value)
else:
# Make argo sensors happy
default_value = json.dumps(default_value)
parameters[param.name] = dict(
python_var_name=var,
name=param.name,
value=default_value,
type=param_type,
description=param.kwargs.get("help"),
is_required=is_required,
**extra_attrs,
)
return parameters
def _process_config_parameters(self):
parameters = []
seen = set()
for var, param in self.flow._get_parameters():
if not param.IS_CONFIG_PARAMETER:
continue
# Throw an exception if the parameter is specified twice.
norm = param.name.lower()
if norm in seen:
raise MetaflowException(
"Parameter *%s* is specified twice. "
"Note that parameter names are "
"case-insensitive." % param.name
)
seen.add(norm)
parameters.append(
dict(name=param.name, kv_name=ConfigInput.make_key_name(param.name))
)
return parameters
def _process_triggers(self):
# Impute triggers for Argo Workflow Template specified through @trigger and
# @trigger_on_finish decorators
# Disallow usage of @trigger and @trigger_on_finish together for now.
if self.flow._flow_decorators.get("trigger") and self.flow._flow_decorators.get(
"trigger_on_finish"
):
raise ArgoWorkflowsException(
"Argo Workflows doesn't support both *@trigger* and "
"*@trigger_on_finish* decorators concurrently yet. Use one or the "
"other for now."
)
triggers = []
options = None
# @trigger decorator
if self.flow._flow_decorators.get("trigger"):
# Parameters are not duplicated, and exist in the flow. Additionally,
# convert them to lower case since Metaflow parameters are case
# insensitive.
seen = set()
# NOTE: We skip config parameters as their values can not be set through event payloads
params = set(
[
param.name.lower()
for var, param in self.flow._get_parameters()
if not param.IS_CONFIG_PARAMETER
]
)
trigger_deco = self.flow._flow_decorators.get("trigger")[0]
trigger_deco.format_deploytime_value()
for event in trigger_deco.triggers:
parameters = {}
# TODO: Add a check to guard against names starting with numerals(?)
if not re.match(r"^[A-Za-z0-9_.-]+$", event["name"]):
raise ArgoWorkflowsException(
"Invalid event name *%s* in *@trigger* decorator. Only "
"alphanumeric characters, underscores(_), dashes(-) and "
"dots(.) are allowed." % event["name"]
)
for key, value in event.get("parameters", {}).items():
if not re.match(r"^[A-Za-z0-9_]+$", value):
raise ArgoWorkflowsException(
"Invalid event payload key *%s* for event *%s* in "
"*@trigger* decorator. Only alphanumeric characters and "
"underscores(_) are allowed." % (value, event["name"])
)
if key.lower() not in params:
raise ArgoWorkflowsException(
"Parameter *%s* defined in the event mappings for "
"*@trigger* decorator not found in the flow." % key
)
if key.lower() in seen:
raise ArgoWorkflowsException(
"Duplicate entries for parameter *%s* defined in the "
"event mappings for *@trigger* decorator." % key.lower()
)
seen.add(key.lower())
parameters[key.lower()] = value
event["parameters"] = parameters
event["type"] = "event"
triggers.extend(self.flow._flow_decorators.get("trigger")[0].triggers)
# Set automatic parameter mapping iff only a single event dependency is
# specified with no explicit parameter mapping.
if len(triggers) == 1 and not triggers[0].get("parameters"):
triggers[0]["parameters"] = dict(zip(params, params))
options = self.flow._flow_decorators.get("trigger")[0].options
# @trigger_on_finish decorator
if self.flow._flow_decorators.get("trigger_on_finish"):
trigger_on_finish_deco = self.flow._flow_decorators.get(
"trigger_on_finish"
)[0]
trigger_on_finish_deco.format_deploytime_value()
for event in trigger_on_finish_deco.triggers:
# Actual filters are deduced here since we don't have access to
# the current object in the @trigger_on_finish decorator.
project_name = event.get("project") or current.get("project_name")
branch_name = event.get("branch") or current.get("branch_name")
# validate that we have complete project info for an event name
if project_name or branch_name:
if not (project_name and branch_name):
# if one of the two is missing, we would end up listening to an event that will never be broadcast.
raise ArgoWorkflowsException(
"Incomplete project info. Please specify both 'project' and 'project_branch' or use the @project decorator"
)
triggers.append(
{
# Make sure this remains consistent with the event name format
# in ArgoWorkflowsInternalDecorator.
"name": "metaflow.%s.end"
% ".".join(
v
for v in [
project_name,
branch_name,
event["flow"],
]
if v
),
"filters": {
"auto-generated-by-metaflow": True,
"project_name": project_name,
"branch_name": branch_name,
# TODO: Add a time filters to guard against cached events
},
"type": "run",
"flow": event["flow"],
}
)
options = self.flow._flow_decorators.get("trigger_on_finish")[0].options
for event in triggers:
# Assign a sanitized name since we need this at many places to please
# Argo Events sensors. There is a slight possibility of name collision
# but quite unlikely for us to worry about at this point.
event["sanitized_name"] = "%s_%s" % (
event["name"]
.replace(".", "")
.replace("-", "")
.replace("@", "")
.replace("+", ""),
to_unicode(base64.b32encode(sha1(to_bytes(event["name"])).digest()))[
:4
].lower(),
)
return triggers, options
def _compile_workflow_template(self):
# This method compiles a Metaflow FlowSpec into Argo WorkflowTemplate
#
# WorkflowTemplate
# |
# -- WorkflowSpec
# |
# -- Array
# |
# -- DAGTemplate, ContainerTemplate
# | |
# -- Array |
# | |
# -- Template
#
# Steps in FlowSpec are represented as DAGTasks.
# A DAGTask can reference to -
# a ContainerTemplate (for linear steps..) or
# another DAGTemplate (for nested `foreach`s).
#
# While we could have very well inlined container templates inside a DAGTask,
# unfortunately Argo variable substitution ({{pod.name}}) doesn't work as
# expected within DAGTasks
# (https://github.com/argoproj/argo-workflows/issues/7432) and we are forced to
# generate container templates at the top level (in WorkflowSpec) and maintain
# references to them within the DAGTask.
annotations = {}
if self._schedule is not None:
# timezone is an optional field and json dumps on None will result in null
# hence configuring it to an empty string
if self._timezone is None:
self._timezone = ""
cron_info = {"schedule": self._schedule, "tz": self._timezone}
annotations.update({"metaflow/cron": json.dumps(cron_info)})
if self.parameters:
annotations.update({"metaflow/parameters": json.dumps(self.parameters)})
# Some more annotations to populate the Argo UI nicely
if self.tags:
annotations.update({"metaflow/tags": json.dumps(self.tags)})
if self.triggers:
annotations.update(
{
"metaflow/triggers": json.dumps(
[
{key: trigger.get(key) for key in ["name", "type"]}
for trigger in self.triggers
]
),
"metaflow/sensor_name": ArgoWorkflows._sensor_name(self.name),
"metaflow/sensor_namespace": ARGO_EVENTS_SENSOR_NAMESPACE,
}
)
if self.notify_on_error:
annotations.update(
{
"metaflow/notify_on_error": json.dumps(
{
"slack": bool(self.notify_slack_webhook_url),
"pager_duty": bool(self.notify_pager_duty_integration_key),
"incident_io": bool(self.notify_incident_io_api_key),
}
)
}
)
if self.notify_on_success:
annotations.update(
{
"metaflow/notify_on_success": json.dumps(
{
"slack": bool(self.notify_slack_webhook_url),
"pager_duty": bool(self.notify_pager_duty_integration_key),
"incident_io": bool(self.notify_incident_io_api_key),
}
)
}
)
try:
# Build the DAG based on the DAGNodes given by the FlowGraph for the found FlowSpec class.
_steps_info, graph_structure = self.graph.output_steps()
graph_info = {
# for the time being, we only need the graph_structure. Being mindful of annotation size limits we do not include anything extra.
"graph_structure": graph_structure
}
except Exception:
graph_info = None
dag_annotation = {"metaflow/dag": json.dumps(graph_info)}
lifecycle_hooks = self._lifecycle_hooks()
return (
WorkflowTemplate()
.metadata(
# Workflow Template metadata.
ObjectMeta()
.name(self.name)
# Argo currently only supports Workflow-level namespaces. When v3.4.0
# is released, we should be able to support multi-namespace /
# multi-cluster scheduling.
.namespace(KUBERNETES_NAMESPACE)
.annotations(annotations)
.annotations(self._base_annotations)
.labels(self._base_labels)
.label("app.kubernetes.io/name", "metaflow-flow")
.annotations(dag_annotation)
)
.spec(
WorkflowSpec()
# Set overall workflow timeout.
.active_deadline_seconds(self.workflow_timeout)
# TODO: Allow Argo to optionally archive all workflow execution logs
# It's disabled for now since it requires all Argo installations
# to enable an artifactory repository. If log archival is
# enabled in workflow controller, the logs for this workflow will
# automatically get archived.
# .archive_logs()
# Don't automount service tokens for now - https://github.com/kubernetes/kubernetes/issues/16779#issuecomment-159656641
# TODO: Service account names are currently set in the templates. We
# can specify the default service account name here to reduce
# the size of the generated YAML by a tiny bit.
# .automount_service_account_token()
# TODO: Support ImagePullSecrets for Argo & Kubernetes
# Not strictly needed since a very valid workaround exists
# https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#add-imagepullsecrets-to-a-service-account
# .image_pull_secrets(...)
# Limit workflow parallelism
.parallelism(self.max_workers)
# TODO: Support Prometheus metrics for Argo
# .metrics(...)
# TODO: Support PodGC and DisruptionBudgets
.priority(self.workflow_priority)
# Set workflow metadata
.workflow_metadata(
Metadata()
.labels(self._base_labels)
.label("app.kubernetes.io/name", "metaflow-run")
.annotations(
{
**annotations,
**{
k: v
for k, v in self._base_annotations.items()
if k
# Skip custom title/description for workflows as this makes it harder to find specific runs.
not in [
"workflows.argoproj.io/title",
"workflows.argoproj.io/description",
]
},
**{"metaflow/run_id": "argo-{{workflow.name}}"},
}
)
# TODO: Set dynamic labels using labels_from. Ideally, we would
# want to expose run_id as a label. It's easy to add labels,
# but very difficult to remove them - let's err on the
# conservative side and only add labels when we come across
# use-cases for them.
)
# Handle parameters
.arguments(
Arguments().parameters(
[
Parameter(parameter["name"])
.value(parameter["value"])
.description(parameter.get("description"))
# TODO: Better handle IncludeFile in Argo Workflows UI.
for parameter in self.parameters.values()
]
+ [
# Introduce non-required parameters for argo events so
# that the entire event payload can be accessed within the
# run. The parameter name is hashed to ensure that
# there won't be any collisions with Metaflow parameters.
Parameter(event["sanitized_name"])
.value(json.dumps(None)) # None in Argo Workflows world.
.description("auto-set by metaflow. safe to ignore.")
for event in self.triggers
]
)
)
# Set common pod metadata.
.pod_metadata(
Metadata()
.labels(self._base_labels)
.label("app.kubernetes.io/name", "metaflow-task")
.annotations(
{
**annotations,
**self._base_annotations,
**{
"metaflow/run_id": "argo-{{workflow.name}}"
}, # we want pods of the workflow to have the run_id as an annotation as well
}
)
)
# Set the entrypoint to flow name
.entrypoint(self.flow.name)
# OnExit hooks
.onExit(
"capture-error-hook-fn-preflight"
if self.enable_error_msg_capture
else None
)
# Set lifecycle hooks if notifications are enabled
.hooks(
{
lifecycle.name: lifecycle
for hook in lifecycle_hooks
for lifecycle in hook.lifecycle_hooks
}
)
# Top-level DAG template(s)
.templates(self._dag_templates())
# Container templates
.templates(self._container_templates())
# Lifecycle hook template(s)
.templates([hook.template for hook in lifecycle_hooks])
# Exit hook template(s)
.templates(self._exit_hook_templates())
# Sidecar templates (Daemon Containers)
.templates(self._daemon_templates())
)
)
# Visit every node and record information on conditional step structure
def _parse_conditional_branches(self):
self.conditional_nodes = set()
self.conditional_join_nodes = set()
self.matching_conditional_join_dict = {}
self.recursive_nodes = set()
node_conditional_parents = {}
node_conditional_branches = {}
def _visit(node, conditional_branch, conditional_parents=None):
if not node.type == "split-switch" and not (
conditional_branch and conditional_parents
):
# skip regular non-conditional nodes entirely
return
if node.type == "split-switch":
conditional_branch = conditional_branch + [node.name]
c_br = node_conditional_branches.get(node.name, [])
node_conditional_branches[node.name] = c_br + [
b for b in conditional_branch if b not in c_br
]
conditional_parents = (
[node.name]
if not conditional_parents
else conditional_parents + [node.name]
)
node_conditional_parents[node.name] = conditional_parents
# check for recursion. this split is recursive if any of its out functions are itself.
if any(
out_func for out_func in node.out_funcs if out_func == node.name
):
self.recursive_nodes.add(node.name)
if conditional_parents and not node.type == "split-switch":
node_conditional_parents[node.name] = conditional_parents
conditional_branch = conditional_branch + [node.name]
c_br = node_conditional_branches.get(node.name, [])
node_conditional_branches[node.name] = c_br + [
b for b in conditional_branch if b not in c_br
]
self.conditional_nodes.add(node.name)
if conditional_branch and conditional_parents:
for n in node.out_funcs:
child = self.graph[n]
if child.name == node.name:
continue
_visit(child, conditional_branch, conditional_parents)
# First we visit all nodes to determine conditional parents and branches
for n in self.graph:
_visit(n, [])
# helper to clean up conditional info for all children of a node, until a new split-switch is encountered.
def _cleanup_conditional_status(node_name, seen):
if self.graph[node_name].type == "split-switch":
# stop recursive cleanup if we hit a new split-switch
return
if node_name in self.conditional_nodes:
self.conditional_nodes.remove(node_name)
node_conditional_parents[node_name] = []
node_conditional_branches[node_name] = []
for p in self.graph[node_name].out_funcs:
if p not in seen:
_cleanup_conditional_status(p, seen + [p])
# Then we traverse again in order to determine conditional join nodes, and matching conditional join info
for node in self.graph:
if node_conditional_parents.get(node.name, False):
# do the required postprocessing for anything requiring node.in_funcs
# check that in previous parsing we have not closed all conditional in_funcs.
# If so, this step can not be conditional either
is_conditional = any(
in_func in self.conditional_nodes
or self.graph[in_func].type == "split-switch"
for in_func in node.in_funcs
)
if is_conditional:
self.conditional_nodes.add(node.name)
else:
if node.name in self.conditional_nodes:
self.conditional_nodes.remove(node.name)
# does this node close the latest conditional parent branches?
conditional_in_funcs = [
in_func
for in_func in node.in_funcs
if node_conditional_branches.get(in_func, False)
]
closed_conditional_parents = []
for last_split_switch in node_conditional_parents.get(node.name, [])[
::-1
]:
last_conditional_split_nodes = self.graph[
last_split_switch
].out_funcs
# NOTE: How do we define a conditional join step?
# The idea here is that we check if the conditional branches(e.g. chains of conditional steps leading to) of all the in_funcs
# manage to tick off every step name that follows a split-switch
# For example, consider the following structure
# switch_step -> A, B, C
# A -> A2 -> A3 -> A4 -> B2
# B -> B2 -> B3 -> C3
# C -> C2 -> C3 -> end
#
# if we look at the in_funcs for C3, they are (C2, B3)
# B3 closes off branches started by A and B
# C3 closes off branches started by C
# therefore C3 is a conditional join step for the 'switch_step'
# NOTE: Then what about a skip step?
# some switch cases might not introduce any distinct steps of their own, opting to instead skip ahead to a later common step.
# Example:
# switch_step -> A, B, C
# A -> A1 -> B2 -> C
# B -> B1 -> B2 -> C
#
# In this case, C is a skip step as it does not add any conditional branching of its own.
# C is also a conditional join, as it closes all branches started by 'switch_step'
closes_branches = all(
(
# branch_root_node_name needs to be in at least one conditional_branch for it to be closed.
any(
branch_root_node_name
in node_conditional_branches.get(in_func, [])
for in_func in conditional_in_funcs
)
# need to account for a switch case skipping completely, not having a conditional-branch of its own.
if branch_root_node_name != node.name
else True
)
for branch_root_node_name in last_conditional_split_nodes
)
if closes_branches:
closed_conditional_parents.append(last_split_switch)
self.conditional_join_nodes.add(node.name)
self.matching_conditional_join_dict[last_split_switch] = (
node.name
)
# Did we close all conditionals? Then this branch and all its children are not conditional anymore (unless a new conditional branch is encountered).
if not [
p
for p in node_conditional_parents.get(node.name, [])
if p not in closed_conditional_parents
]:
_cleanup_conditional_status(node.name, [])
def _is_conditional_node(self, node):
return node.name in self.conditional_nodes
def _is_conditional_skip_node(self, node):
return (
self._is_conditional_node(node)
and any(
self.graph[in_func].type == "split-switch" for in_func in node.in_funcs
)
and len(
[
in_func
for in_func in node.in_funcs
if self._is_conditional_node(self.graph[in_func])
or self.graph[in_func].type == "split-switch"
]
)
> 1
)
def _is_conditional_join_node(self, node):
return node.name in self.conditional_join_nodes
def _many_in_funcs_all_conditional(self, node):
cond_in_funcs = [
in_func
for in_func in node.in_funcs
if self._is_conditional_node(self.graph[in_func])
]
return len(cond_in_funcs) > 1 and len(cond_in_funcs) == len(node.in_funcs)
def _is_recursive_node(self, node):
return node.name in self.recursive_nodes
def _matching_conditional_join(self, node):
# If no earlier conditional join step is found during parsing, then 'end' is always one.
return self.matching_conditional_join_dict.get(node.name, "end")
# Visit every node and yield the uber DAGTemplate(s).
def _dag_templates(self):
def _visit(
node,
exit_node=None,
templates=None,
dag_tasks=None,
parent_foreach=None,
seen=None,
): # Returns Tuple[List[Template], List[DAGTask]]
""" """
# Every for-each node results in a separate subDAG and an equivalent
# DAGTemplate rooted at the child of the for-each node. Each DAGTemplate
# has a unique name - the top-level DAGTemplate is named as the name of
# the flow and the subDAG DAGTemplates are named after the (only) descendant
# of the for-each node.
# Emit if we have reached the end of the sub workflow
if seen is None:
seen = []
if dag_tasks is None:
dag_tasks = []
if templates is None:
templates = []
if exit_node is not None and exit_node is node.name:
return templates, dag_tasks
if node.name in seen:
return templates, dag_tasks
seen.append(node.name)
# helper variable for recursive conditional inputs
has_foreach_inputs = False
if node.name == "start":
# Start node has no dependencies.
dag_task = DAGTask(self._sanitize(node.name)).template(
self._sanitize(node.name)
)
if (
node.is_inside_foreach
and self.graph[node.in_funcs[0]].type == "foreach"
and not self.graph[node.in_funcs[0]].parallel_foreach
# We need to distinguish what is a "regular" foreach (i.e something that doesn't care about to gang semantics)
# vs what is a "num_parallel" based foreach (i.e. something that follows gang semantics.)
# A `regular` foreach is basically any arbitrary kind of foreach.
):
# helper variable for recursive conditional inputs
has_foreach_inputs = True
# Child of a foreach node needs input-paths as well as split-index
# This child is the first node of the sub workflow and has no dependency
parameters = [
Parameter("input-paths").value("{{inputs.parameters.input-paths}}"),
Parameter("split-index").value("{{inputs.parameters.split-index}}"),
]
dag_task = (
DAGTask(self._sanitize(node.name))
.template(self._sanitize(node.name))
.arguments(Arguments().parameters(parameters))
)
elif node.parallel_step:
# This is the step where the @parallel decorator is defined.
# Since this DAGTask will call the for the `resource` [based templates]
# (https://argo-workflows.readthedocs.io/en/stable/walk-through/kubernetes-resources/)
# we have certain constraints on the way we can pass information inside the Jobset manifest
# [All templates will have access](https://argo-workflows.readthedocs.io/en/stable/variables/#all-templates)
# to the `inputs.parameters` so we will pass down ANY/ALL information using the
# input parameters.
# We define the usual parameters like input-paths/split-index etc. but we will also
# define the following:
# - `workerCount`: parameter which will be used to determine the number of
# parallel worker jobs
# - `jobset-name`: parameter which will be used to determine the name of the jobset.
# This parameter needs to be dynamic so that when we have retries we don't
# end up using the name of the jobset again (if we do, it will crash since k8s wont allow duplicated job names)
# - `retryCount`: parameter which will be used to determine the number of retries
# This parameter will *only* be available within the container templates like we
# have it for all other DAGTasks and NOT for custom kubernetes resource templates.
# So as a work-around, we will set it as the `retryCount` parameter instead of
# setting it as a {{ retries }} in the CLI code. Once set as a input parameter,
# we can use it in the Jobset Manifest templates as `{{inputs.parameters.retryCount}}`
# - `task-id-entropy`: This is a parameter which will help derive task-ids and jobset names. This parameter
# contains the relevant amount of entropy to ensure that task-ids and jobset names
# are uniquish. We will also use this in the join task to construct the task-ids of
# all parallel tasks since the task-ids for parallel task are minted formulaically.
parameters = [
Parameter("input-paths").value("{{inputs.parameters.input-paths}}"),
Parameter("num-parallel").value(
"{{inputs.parameters.num-parallel}}"
),
Parameter("split-index").value("{{inputs.parameters.split-index}}"),
Parameter("task-id-entropy").value(
"{{inputs.parameters.task-id-entropy}}"
),
# we cant just use hyphens with sprig.
# https://github.com/argoproj/argo-workflows/issues/10567#issuecomment-1452410948
Parameter("workerCount").value(
"{{=sprig.int(sprig.sub(sprig.int(inputs.parameters['num-parallel']),1))}}"
),
]
# Resolve retry strategy to determine if we should add retry-related parameters.
# {{retries}} is only available if retryStrategy is specified in the template.
max_user_code_retries = 0
max_error_retries = 0
for decorator in node.decorators:
user_code_retries, error_retries = decorator.step_task_retry_count()
max_user_code_retries = max(
max_user_code_retries, user_code_retries
)
max_error_retries = max(max_error_retries, error_retries)
total_retries = max_user_code_retries + max_error_retries
if total_retries > 0:
parameters.extend(
[
Parameter("retryCount").value("{{retries}}"),
# The job-setname needs to be unique for each retry
# and we cannot use the `generateName` field in the
# Jobset Manifest since we need to construct the subdomain
# and control pod domain name pre-hand. So we will use
# the retry count to ensure that the jobset name is unique
Parameter("jobset-name").value(
"js-{{inputs.parameters.task-id-entropy}}{{retries}}",
),
]
)
else:
parameters.extend(
[
Parameter("jobset-name").value(
"js-{{inputs.parameters.task-id-entropy}}",
)
]
)
dag_task = (
DAGTask(self._sanitize(node.name))
.template(self._sanitize(node.name))
.arguments(Arguments().parameters(parameters))
)
else:
# Every other node needs only input-paths
parameters = [
Parameter("input-paths").value(
compress_list(
[
"argo-{{workflow.name}}/%s/{{tasks.%s.outputs.parameters.task-id}}"
% (n, self._sanitize(n))
for n in node.in_funcs
],
# NOTE: We set zlibmin to infinite because zlib compression for the Argo input-paths breaks template value substitution.
zlibmin=inf,
)
)
]
# NOTE: Due to limitations with Argo Workflows Parameter size we
# can not pass arbitrarily large lists of task id's to join tasks.
# Instead we ensure that task id's for foreach tasks can be
# deduced deterministically and pass the relevant information to
# the join task.
#
# We need to add the split-index and root-input-path for the last
# step in any foreach scope and use these to generate the task id,
# as the join step uses the root and the cardinality of the
# foreach scope to generate the required id's.
if (
node.is_inside_foreach
and self.graph[node.out_funcs[0]].type == "join"
):
if any(
self.graph[parent].matching_join
== self.graph[node.out_funcs[0]].name
and self.graph[parent].type == "foreach"
for parent in self.graph[node.out_funcs[0]].split_parents
):
parameters.extend(
[
Parameter("split-index").value(
"{{inputs.parameters.split-index}}"
),
Parameter("root-input-path").value(
"{{inputs.parameters.input-paths}}"
),
]
)
conditional_deps = [
"%s.Succeeded" % self._sanitize(in_func)
for in_func in node.in_funcs
if self._is_conditional_node(self.graph[in_func])
or self.graph[in_func].type == "split-switch"
]
required_deps = [
"%s.Succeeded" % self._sanitize(in_func)
for in_func in node.in_funcs
if not self._is_conditional_node(self.graph[in_func])
and self.graph[in_func].type != "split-switch"
]
if self._is_conditional_skip_node(
node
) or self._many_in_funcs_all_conditional(node):
# skip nodes need unique condition handling
conditional_deps = [
"%s.Succeeded" % self._sanitize(in_func)
for in_func in node.in_funcs
]
required_deps = []
# join steps in_funcs need special handling, as there can be disjoint sets of always-executing and conditional branches.
if node.type == "join" and any(
self._is_conditional_node(self.graph[fn]) for fn in node.in_funcs
):
def _split_switch_ancestors(step_name, first_ancestor):
acc = []
for in_fn in self.graph[step_name].in_funcs:
if self.graph[in_fn].type == "split-switch":
acc.append(in_fn)
if not in_fn == first_ancestor:
acc.extend(
_split_switch_ancestors(in_fn, first_ancestor)
)
return acc
node_groups = {}
node_switch_ancestors = {}
for fn in node.in_funcs:
if self.graph[fn].split_branches:
# This is the latest split in the DAG.
last_split = self.graph[fn].split_branches[-1]
switch_ancestors = _split_switch_ancestors(
fn, node.split_parents[-1]
)
if switch_ancestors:
node_switch_ancestors[fn] = switch_ancestors
new_funcs = node_groups.get(last_split, [])
new_funcs.append(fn)
node_groups[last_split] = new_funcs
def build_ancestor_tree(node_groups, switch_ancestors):
result = {}
for parent, children in node_groups.items():
nodes = [
n
for g in children
for n in (g if isinstance(g, list) else [g])
]
# Group nodes by their ancestor set
by_anc = defaultdict(list)
for n in nodes:
by_anc[frozenset(switch_ancestors.get(n, []))].append(n)
# Sort from most specific (most ancestors) to least
groups = sorted(
by_anc.items(), key=lambda x: len(x[0]), reverse=True
)
# Greedily build chains: add to a chain if this key is a subset of its first (largest) key
chains = []
for key, grp in groups:
for chain in chains:
if key <= chain[0][0]:
chain.append((key, grp))
break
else:
chains.append([(key, grp)])
result[parent] = [[g for _, g in chain] for chain in chains]
return result
if node_groups:
conditional_deps = []
required_deps = []
for parent, chains in build_ancestor_tree(
node_groups, node_switch_ancestors
).items():
parts = []
for chain in chains:
groups = [
"({})".format(
" || ".join(
"%s.Succeeded" % self._sanitize(g)
for g in grp
)
)
for grp in chain
]
parts.append("({})".format(" || ".join(groups)))
required_deps.append("&&".join(parts))
both_conditions = required_deps and conditional_deps
depends_str = "{required}{_and}{conditional}".format(
required=("(%s)" if both_conditions else "%s")
% " && ".join(required_deps),
_and=" && " if both_conditions else "",
conditional=("(%s)" if both_conditions else "%s")
% " || ".join(conditional_deps),
)
dag_task = (
DAGTask(self._sanitize(node.name))
.depends(depends_str)
.template(self._sanitize(node.name))
.arguments(Arguments().parameters(parameters))
)
# Add conditional if this is the first step in a conditional branch
switch_in_funcs = [
in_func
for in_func in node.in_funcs
if self.graph[in_func].type == "split-switch"
]
if (
self._is_conditional_node(node)
or self._is_conditional_skip_node(node)
or self._is_conditional_join_node(node)
) and switch_in_funcs:
# It is possible that the some of the leading steps did not execute at all. In this case the switch-step output would be missing and needs to be accounted for.
# NOTE: Due to an issue in Argo Workflows 'when' clauses, we can not use ternaries or 'safe' getters directly on a tasks['step-name'] due to this leading to errors when the step has not executed.
conditional_when = "||".join(
[
"({{=(tasks['%s'].status == 'Succeeded' ? tasks['%s'].outputs.parameters['switch-step'] : nil) == '%s'}})"
% (
self._sanitize(switch_in_func),
self._sanitize(switch_in_func),
node.name,
)
for switch_in_func in switch_in_funcs
]
)
non_switch_in_funcs = [
in_func
for in_func in node.in_funcs
if in_func not in switch_in_funcs
]
status_when = ""
if non_switch_in_funcs:
status_when = "||".join(
[
"{{tasks.%s.status}}==Succeeded"
% self._sanitize(in_func)
for in_func in non_switch_in_funcs
]
)
total_when = (
f"({status_when}) || ({conditional_when})"
if status_when
else conditional_when
)
dag_task.when(total_when)
dag_tasks.append(dag_task)
# End the workflow if we have reached the end of the flow
if node.type == "end":
return templates, dag_tasks
# For split nodes traverse all the children
if node.type == "split":
for n in node.out_funcs:
_visit(
self.graph[n],
node.matching_join,
templates,
dag_tasks,
parent_foreach,
seen,
)
return _visit(
self.graph[node.matching_join],
exit_node,
templates,
dag_tasks,
parent_foreach,
seen,
)
elif node.type == "split-switch":
if self._is_recursive_node(node):
# we need an additional recursive template if the step is recursive
# NOTE: in the recursive case, the original step is renamed in the container templates to 'recursive-'
# so that we do not have to touch the step references in the DAG.
#
# NOTE: The way that recursion in Argo Workflows is achieved is with the following structure:
# - the usual 'example-step' template which would match example_step in flow code is renamed to 'recursive-example-step'
# - templates has another template with the original task name: 'example-step'
# - the template 'example-step' in turn has steps
# - 'example-step-internal' which uses the metaflow step executing template 'recursive-example-step'
# - 'example-step-recursion' which calls the parent template 'example-step' if switch-step output from 'example-step-internal' matches the condition.
sanitized_name = self._sanitize(node.name)
templates.append(
Template(sanitized_name)
.steps(
[
WorkflowStep()
.name("%s-internal" % sanitized_name)
.template("recursive-%s" % sanitized_name)
.arguments(
Arguments().parameters(
[
Parameter("input-paths").value(
"{{inputs.parameters.input-paths}}"
)
]
# Add the additional inputs required by specific node types.
# We do not need to cover joins or @parallel, as a split-switch step can not be either one of these.
+ (
[
Parameter("split-index").value(
"{{inputs.parameters.split-index}}"
)
]
if has_foreach_inputs
else []
)
)
)
]
)
.steps(
[
WorkflowStep()
.name("%s-recursion" % sanitized_name)
.template(sanitized_name)
.when(
"{{steps.%s-internal.outputs.parameters.switch-step}}==%s"
% (sanitized_name, node.name)
)
.arguments(
Arguments().parameters(
[
Parameter("input-paths").value(
"argo-{{workflow.name}}/%s/{{steps.%s-internal.outputs.parameters.task-id}}"
% (node.name, sanitized_name)
)
]
+ (
[
Parameter("split-index").value(
"{{inputs.parameters.split-index}}"
)
]
if has_foreach_inputs
else []
)
)
),
]
)
.inputs(Inputs().parameters(parameters))
.outputs(
# NOTE: We try to read the output parameters from the recursive template call first (-recursion), and the internal step second (-internal).
# This guarantees that we always get the output parameters of the last recursive step that executed.
Outputs().parameters(
[
Parameter("task-id").valueFrom(
{
"expression": "(steps['%s-recursion']?.outputs ?? steps['%s-internal']?.outputs).parameters['task-id']"
% (sanitized_name, sanitized_name)
}
),
Parameter("switch-step").valueFrom(
{
"expression": "(steps['%s-recursion']?.outputs ?? steps['%s-internal']?.outputs).parameters['switch-step']"
% (sanitized_name, sanitized_name)
}
),
]
)
)
)
for n in node.out_funcs:
_visit(
self.graph[n],
self._matching_conditional_join(node),
templates,
dag_tasks,
parent_foreach,
seen,
)
return _visit(
self.graph[self._matching_conditional_join(node)],
exit_node,
templates,
dag_tasks,
parent_foreach,
seen,
)
# For foreach nodes generate a new sub DAGTemplate
# We do this for "regular" foreaches (ie. `self.next(self.a, foreach=)`)
elif node.type == "foreach":
foreach_template_name = self._sanitize(
"%s-foreach-%s"
% (
node.name,
"parallel" if node.parallel_foreach else node.foreach_param,
# Since foreach's are derived based on `self.next(self.a, foreach="")`
# vs @parallel foreach are done based on `self.next(self.a, num_parallel="")`,
# we need to ensure that `foreach_template_name` suffix is appropriately set based on the kind
# of foreach.
)
)
# There are two separate "DAGTask"s created for the foreach node.
# - The first one is a "jump-off" DAGTask where we propagate the
# input-paths and split-index. This thing doesn't create
# any actual containers and it responsible for only propagating
# the parameters.
# - The DAGTask that follows first DAGTask is the one
# that uses the ContainerTemplate. This DAGTask is named the same
# thing as the foreach node. We will leverage a similar pattern for the
# @parallel tasks.
#
foreach_task = (
DAGTask(foreach_template_name)
.depends(f"{self._sanitize(node.name)}.Succeeded")
.template(foreach_template_name)
.arguments(
Arguments().parameters(
[
Parameter("input-paths").value(
"argo-{{workflow.name}}/%s/{{tasks.%s.outputs.parameters.task-id}}"
% (node.name, self._sanitize(node.name))
),
Parameter("split-index").value("{{item}}"),
]
+ (
[
Parameter("root-input-path").value(
"argo-{{workflow.name}}/%s/{{tasks.%s.outputs.parameters.task-id}}"
% (node.name, self._sanitize(node.name))
),
]
if parent_foreach
else []
)
+ (
# Disabiguate parameters for a regular `foreach` vs a `@parallel` foreach
[
Parameter("num-parallel").value(
"{{tasks.%s.outputs.parameters.num-parallel}}"
% self._sanitize(node.name)
),
Parameter("task-id-entropy").value(
"{{tasks.%s.outputs.parameters.task-id-entropy}}"
% self._sanitize(node.name)
),
]
if node.parallel_foreach
else []
)
)
)
.with_param(
# For @parallel workloads `num-splits` will be explicitly set to one so that
# we can piggyback on the current mechanism with which we leverage argo.
"{{tasks.%s.outputs.parameters.num-splits}}"
% self._sanitize(node.name)
)
)
# Add conditional if this is the first step in a conditional branch
if self._is_conditional_node(node) and not any(
self._is_conditional_node(self.graph[in_func])
for in_func in node.in_funcs
):
in_func = node.in_funcs[0]
foreach_task.when(
"{{tasks.%s.outputs.parameters.switch-step}}==%s"
% (self._sanitize(in_func), node.name)
)
dag_tasks.append(foreach_task)
templates, dag_tasks_1 = _visit(
self.graph[node.out_funcs[0]],
node.matching_join,
templates,
[],
node.name,
seen,
)
# How do foreach's work on Argo:
# Lets say you have the following dag: (start[sets `foreach="x"`]) --> (task-a [actual foreach]) --> (join) --> (end)
# With argo we will :
# (start [sets num-splits]) --> (task-a-foreach-(0,0) [dummy task]) --> (task-a) --> (join) --> (end)
# The (task-a-foreach-(0,0) [dummy task]) propagates the values of the `split-index` and the input paths.
# to the actual foreach task.
templates.append(
Template(foreach_template_name)
.inputs(
Inputs().parameters(
[Parameter("input-paths"), Parameter("split-index")]
+ ([Parameter("root-input-path")] if parent_foreach else [])
+ (
[
Parameter("num-parallel"),
Parameter("task-id-entropy"),
# Parameter("workerCount")
]
if node.parallel_foreach
else []
)
)
)
.outputs(
Outputs().parameters(
[
# non @parallel tasks set task-ids as outputs
Parameter("task-id").valueFrom(
{
"parameter": "{{tasks.%s.outputs.parameters.task-id}}"
% self._sanitize(
self.graph[node.matching_join].in_funcs[0]
)
}
if not self._is_conditional_join_node(
self.graph[node.matching_join]
)
else
# Note: If the nodes leading to the join are conditional, then we need to use an expression to pick the outputs from the task that executed.
# ref for operators: https://github.com/expr-lang/expr/blob/master/docs/language-definition.md
{
"expression": "get((%s)?.parameters, 'task-id')"
% " ?? ".join(
f"tasks['{self._sanitize(func)}']?.outputs"
for func in self.graph[
node.matching_join
].in_funcs
)
}
),
]
if not node.parallel_foreach
else [
# @parallel tasks set `task-id-entropy` and `num-parallel`
# as outputs so task-ids can be derived in the join step.
# Both of these values should be propagated from the
# jobset labels.
Parameter("num-parallel").valueFrom(
{
"parameter": "{{tasks.%s.outputs.parameters.num-parallel}}"
% self._sanitize(
self.graph[node.matching_join].in_funcs[0]
)
}
),
Parameter("task-id-entropy").valueFrom(
{
"parameter": "{{tasks.%s.outputs.parameters.task-id-entropy}}"
% self._sanitize(
self.graph[node.matching_join].in_funcs[0]
)
}
),
]
)
)
.dag(DAGTemplate().fail_fast().tasks(dag_tasks_1))
)
join_foreach_task = (
DAGTask(self._sanitize(self.graph[node.matching_join].name))
.template(self._sanitize(self.graph[node.matching_join].name))
.depends(f"{foreach_template_name}.Succeeded")
.arguments(
Arguments().parameters(
(
[
Parameter("input-paths").value(
"argo-{{workflow.name}}/%s/{{tasks.%s.outputs.parameters.task-id}}"
% (node.name, self._sanitize(node.name))
),
Parameter("split-cardinality").value(
"{{tasks.%s.outputs.parameters.split-cardinality}}"
% self._sanitize(node.name)
),
]
if not node.parallel_foreach
else [
Parameter("num-parallel").value(
"{{tasks.%s.outputs.parameters.num-parallel}}"
% self._sanitize(node.name)
),
Parameter("task-id-entropy").value(
"{{tasks.%s.outputs.parameters.task-id-entropy}}"
% self._sanitize(node.name)
),
]
)
+ (
[
Parameter("split-index").value(
# TODO : Pass down these parameters to the jobset stuff.
"{{inputs.parameters.split-index}}"
),
Parameter("root-input-path").value(
"{{inputs.parameters.input-paths}}"
),
]
if parent_foreach
else []
)
)
)
)
dag_tasks.append(join_foreach_task)
return _visit(
self.graph[self.graph[node.matching_join].out_funcs[0]],
exit_node,
templates,
dag_tasks,
parent_foreach,
seen,
)
# For linear nodes continue traversing to the next node
if node.type in ("linear", "join", "start"):
return _visit(
self.graph[node.out_funcs[0]],
exit_node,
templates,
dag_tasks,
parent_foreach,
seen,
)
else:
raise ArgoWorkflowsException(
"Node type *%s* for step *%s* is not currently supported by "
"Argo Workflows." % (node.type, node.name)
)
# Generate daemon tasks
daemon_tasks = [
DAGTask("%s-task" % daemon_template.name).template(daemon_template.name)
for daemon_template in self._daemon_templates()
]
templates, dag_tasks = _visit(node=self.graph["start"], dag_tasks=daemon_tasks)
# Add the DAG template only after fully traversing the graph so we are guaranteed to have all the dag_tasks collected.
templates.append(
Template(self.flow.name).dag(DAGTemplate().fail_fast().tasks(dag_tasks))
)
return templates
# Visit every node and yield ContainerTemplates.
def _container_templates(self):
try:
# Kubernetes is a soft dependency for generating Argo objects.
# We can very well remove this dependency for Argo with the downside of
# adding a bunch more json bloat classes (looking at you... V1Container)
from kubernetes import client as kubernetes_sdk
except (NameError, ImportError):
raise MetaflowException(
"Could not import Python package 'kubernetes'. Install kubernetes "
"sdk (https://pypi.org/project/kubernetes/) first."
)
for node in self.graph:
# Resolve entry point for pod container.
script_name = os.path.basename(sys.argv[0])
executable = self.environment.executable(node.name)
# TODO: Support R someday. Quite a few people will be happy.
entrypoint = [executable, script_name]
# The values with curly braces '{{}}' are made available by Argo
# Workflows. Unfortunately, there are a few bugs in Argo which prevent
# us from accessing these values as liberally as we would like to - e.g,
# within inline templates - so we are forced to generate container templates
run_id = "argo-{{workflow.name}}"
# Unfortunately, we don't have any easy access to unique ids that remain
# stable across task attempts through Argo Workflows. So, we are forced to
# stitch them together ourselves. The task ids are a function of step name,
# split index and the parent task id (available from input path name).
# Ideally, we would like these task ids to be the same as node name
# (modulo retry suffix) on Argo Workflows but that doesn't seem feasible
# right now.
task_idx = ""
input_paths = ""
root_input = None
# export input_paths as it is used multiple times in the container script
# and we do not want to repeat the values.
input_paths_expr = "export INPUT_PATHS=''"
# If node is not a start step or a @parallel join then we will set the input paths.
# To set the input-paths as a parameter, we need to ensure that the node
# is not (a start node or a parallel join node). Start nodes will have no
# input paths and parallel join will derive input paths based on a
# formulaic approach using `num-parallel` and `task-id-entropy`.
if not (
node.name == "start"
or (node.type == "join" and self.graph[node.in_funcs[0]].parallel_step)
):
# For parallel joins we don't pass the INPUT_PATHS but are dynamically constructed.
# So we don't need to set the input paths.
input_paths_expr = (
"export INPUT_PATHS={{inputs.parameters.input-paths}}"
)
if (
(
self._is_conditional_join_node(node)
or self._many_in_funcs_all_conditional(node)
or self._is_conditional_skip_node(node)
)
and not (
node.type == "join"
and self.graph[node.split_parents[-1]].type == "foreach"
) # base64 encoding input-paths for foreach joins is unnecessary, as this is simply the task id of the splitting step.
and not (
node.is_inside_foreach
and self.graph[node.out_funcs[0]].type == "join"
) # do not base64 encode the input-paths of a step inside a foreach that leads to a join, as this would not match the task-id generation logic that the join relies on.
):
# NOTE: Argo template expressions that fail to resolve, output the expression itself as a value.
# With conditional steps, some of the input-paths are therefore 'broken' due to containing a nil expression
# e.g. "{{ tasks['A'].outputs.parameters.task-id }}" when task A never executed.
# We base64 encode the input-paths in order to not pollute the execution environment with templating expressions.
# NOTE: Adding conditionals that check if a key exists or not does not work either, due to an issue with how Argo
# handles tasks in a nested foreach (withParam template) leading to all such expressions getting evaluated as false.
input_paths_expr = "export INPUT_PATHS={{=toBase64(inputs.parameters['input-paths'])}}"
input_paths = "$(echo $INPUT_PATHS)"
if any(self.graph[n].type == "foreach" for n in node.in_funcs):
task_idx = "{{inputs.parameters.split-index}}"
if node.is_inside_foreach and self.graph[node.out_funcs[0]].type == "join":
if any(
self.graph[parent].matching_join
== self.graph[node.out_funcs[0]].name
for parent in self.graph[node.out_funcs[0]].split_parents
if self.graph[parent].type == "foreach"
) and any(not self.graph[f].type == "foreach" for f in node.in_funcs):
# we need to propagate the split-index and root-input-path info for
# the last step inside a foreach for correctly joining nested
# foreaches
task_idx = "{{inputs.parameters.split-index}}"
root_input = "{{inputs.parameters.root-input-path}}"
# Task string to be hashed into an ID
task_str = "-".join(
[
node.name,
"{{workflow.creationTimestamp}}",
root_input or input_paths,
task_idx,
]
)
if node.parallel_step:
task_str = "-".join(
[
"$TASK_ID_PREFIX",
"{{inputs.parameters.task-id-entropy}}",
"$TASK_ID_SUFFIX",
]
)
else:
# Generated task_ids need to be non-numeric - see register_task_id in
# service.py. We do so by prefixing `t-`
_task_id_base = (
"$(echo %s | md5sum | cut -d ' ' -f 1 | tail -c 9)" % task_str
)
task_str = "(t-%s)" % _task_id_base
task_id_expr = "export METAFLOW_TASK_ID=" "%s" % task_str
task_id = "$METAFLOW_TASK_ID"
# Resolve retry strategy.
max_user_code_retries = 0
max_error_retries = 0
minutes_between_retries = "2"
for decorator in node.decorators:
if decorator.name == "retry":
minutes_between_retries = decorator.attributes.get(
"minutes_between_retries", minutes_between_retries
)
user_code_retries, error_retries = decorator.step_task_retry_count()
max_user_code_retries = max(max_user_code_retries, user_code_retries)
max_error_retries = max(max_error_retries, error_retries)
user_code_retries = max_user_code_retries
total_retries = max_user_code_retries + max_error_retries
# {{retries}} is only available if retryStrategy is specified
# For custom kubernetes manifests, we will pass the retryCount as a parameter
# and use that in the manifest.
retry_count = (
(
"{{retries}}"
if not node.parallel_step
else "{{inputs.parameters.retryCount}}"
)
if total_retries
else 0
)
minutes_between_retries = int(minutes_between_retries)
# Configure log capture.
mflog_expr = export_mflog_env_vars(
datastore_type=self.flow_datastore.TYPE,
stdout_path="$PWD/.logs/mflog_stdout",
stderr_path="$PWD/.logs/mflog_stderr",
flow_name=self.flow.name,
run_id=run_id,
step_name=node.name,
task_id=task_id,
retry_count=retry_count,
)
init_cmds = " && ".join(
[
# For supporting sandboxes, ensure that a custom script is executed
# before anything else is executed. The script is passed in as an
# env var.
'${METAFLOW_INIT_SCRIPT:+eval \\"${METAFLOW_INIT_SCRIPT}\\"}',
"mkdir -p $PWD/.logs",
input_paths_expr,
task_id_expr,
mflog_expr,
]
+ self.environment.get_package_commands(
self.code_package_url,
self.flow_datastore.TYPE,
self.code_package_metadata,
)
)
step_cmds = self.environment.bootstrap_commands(
node.name, self.flow_datastore.TYPE
)
top_opts_dict = {
"with": [
decorator.make_decorator_spec()
for decorator in node.decorators
if not decorator.statically_defined
and decorator.inserted_by is None
]
}
# FlowDecorators can define their own top-level options. They are
# responsible for adding their own top-level options and values through
# the get_top_level_options() hook. See similar logic in runtime.py.
for deco in flow_decorators(self.flow):
top_opts_dict.update(deco.get_top_level_options())
top_level = list(dict_to_cli_options(top_opts_dict)) + [
"--quiet",
"--metadata=%s" % self.metadata.TYPE,
"--environment=%s" % self.environment.TYPE,
"--datastore=%s" % self.flow_datastore.TYPE,
"--datastore-root=%s" % self.flow_datastore.datastore_root,
"--event-logger=%s" % self.event_logger.TYPE,
"--monitor=%s" % self.monitor.TYPE,
"--no-pylint",
"--with=argo_workflows_internal:auto-emit-argo-events=%i"
% self.auto_emit_argo_events,
]
if node.name == "start":
# Execute `init` before any step of the workflow executes
task_id_params = "%s-params" % task_id
init = (
entrypoint
+ top_level
+ [
"init",
"--run-id %s" % run_id,
"--task-id %s" % task_id_params,
]
+ [
# Parameter names can be hyphenated, hence we use
# {{foo.bar['param_name']}}.
# https://argoproj.github.io/argo-events/tutorials/02-parameterization/
# http://masterminds.github.io/sprig/strings.html
"--%s=\\\"$(python -m metaflow.plugins.argo.param_val {{=toBase64(workflow.parameters['%s'])}})\\\""
% (parameter["name"], parameter["name"])
for parameter in self.parameters.values()
]
)
if self.tags:
init.extend("--tag %s" % tag for tag in self.tags)
# if the start step gets retried, we must be careful
# not to regenerate multiple parameters tasks. Hence,
# we check first if _parameters exists already.
exists = entrypoint + [
"dump",
"--max-value-size=0",
"%s/_parameters/%s" % (run_id, task_id_params),
]
step_cmds.extend(
[
"if ! %s >/dev/null 2>/dev/null; then %s; fi"
% (" ".join(exists), " ".join(init))
]
)
input_paths = "%s/_parameters/%s" % (run_id, task_id_params)
# Only for static joins and conditional_joins
elif (
self._is_conditional_join_node(node)
or self._many_in_funcs_all_conditional(node)
or self._is_conditional_skip_node(node)
) and not (
node.type == "join"
and self.graph[node.split_parents[-1]].type == "foreach"
):
# we need to pass in the set of conditional in_funcs to the pathspec generating script as in the case of split-switch skipping cases,
# non-conditional input-paths need to be ignored in favour of conditional ones when they have executed.
skippable_input_steps = ",".join(
[
in_func
for in_func in node.in_funcs
if self.graph[in_func].type == "split-switch"
]
)
input_paths = (
"$(python -m metaflow.plugins.argo.conditional_input_paths %s %s)"
% (input_paths, skippable_input_steps)
)
elif (
node.type == "join"
and self.graph[node.split_parents[-1]].type == "foreach"
):
# foreach-joins straight out of conditional branches are not yet supported
if self._is_conditional_join_node(node) and len(node.in_funcs) > 1:
raise ArgoWorkflowsException(
"Conditional steps inside a foreach that transition directly into a join step are not currently supported.\n"
"As a workaround, add a common step after the conditional steps %s "
"that will transition to a join."
% ", ".join("*%s*" % f for f in node.in_funcs)
)
# Set aggregated input-paths for a for-each join
foreach_step = next(
n for n in node.in_funcs if self.graph[n].is_inside_foreach
)
if not self.graph[node.split_parents[-1]].parallel_foreach:
input_paths = (
"$(python -m metaflow.plugins.argo.generate_input_paths %s {{workflow.creationTimestamp}} %s {{inputs.parameters.split-cardinality}})"
% (
foreach_step,
input_paths,
)
)
else:
# Handle @parallel where output from volume mount isn't accessible
input_paths = (
"$(python -m metaflow.plugins.argo.jobset_input_paths %s %s {{inputs.parameters.task-id-entropy}} {{inputs.parameters.num-parallel}})"
% (
run_id,
foreach_step,
)
)
# NOTE: input-paths might be extremely lengthy so we dump these to disk instead of passing them directly to the cmd
step_cmds.append("echo %s >> /tmp/mf-input-paths" % input_paths)
step = [
"step",
node.name,
"--run-id %s" % run_id,
"--task-id %s" % task_id,
"--retry-count %s" % retry_count,
"--max-user-code-retries %d" % user_code_retries,
"--input-paths-filename /tmp/mf-input-paths",
]
if node.parallel_step:
step.append(
"--split-index ${MF_CONTROL_INDEX:-$((MF_WORKER_REPLICA_INDEX + 1))}"
)
# This is needed for setting the value of the UBF context in the CLI.
step.append("--ubf-context $UBF_CONTEXT")
elif any(self.graph[n].type == "foreach" for n in node.in_funcs):
# Pass split-index to a foreach task
step.append("--split-index {{inputs.parameters.split-index}}")
if self.tags:
step.extend("--tag %s" % tag for tag in self.tags)
if self.namespace is not None:
step.append("--namespace=%s" % self.namespace)
step_cmds.extend([" ".join(entrypoint + top_level + step)])
cmd_str = "%s; c=$?; %s; exit $c" % (
" && ".join([init_cmds, bash_capture_logs(" && ".join(step_cmds))]),
BASH_SAVE_LOGS,
)
cmds = shlex.split('bash -c "%s"' % cmd_str)
# Resolve resource requirements.
resources = dict(
[deco for deco in node.decorators if deco.name == "kubernetes"][
0
].attributes
)
if (
resources["namespace"]
and resources["namespace"] != KUBERNETES_NAMESPACE
):
raise ArgoWorkflowsException(
"Multi-namespace Kubernetes execution of flows in Argo Workflows "
"is not currently supported. \nStep *%s* is trying to override "
"the default Kubernetes namespace *%s*."
% (node.name, KUBERNETES_NAMESPACE)
)
run_time_limit = [
deco for deco in node.decorators if deco.name == "kubernetes"
][0].run_time_limit
# Resolve @environment decorator. We set three classes of environment
# variables -
# (1) User-specified environment variables through @environment
# (2) Metaflow runtime specific environment variables
# (3) @kubernetes, @argo_workflows_internal bookkeeping environment
# variables
env = dict(
[deco for deco in node.decorators if deco.name == "environment"][
0
].attributes["vars"]
)
# Temporary passing of *some* environment variables. Do not rely on this
# mechanism as it will be removed in the near future
env.update(
{
k: v
for k, v in config_values()
if k.startswith("METAFLOW_CONDA_")
or k.startswith("METAFLOW_DEBUG_")
}
)
env.update(
{
**{
# These values are needed by Metaflow to set it's internal
# state appropriately.
"METAFLOW_CODE_METADATA": self.code_package_metadata,
"METAFLOW_CODE_URL": self.code_package_url,
"METAFLOW_CODE_SHA": self.code_package_sha,
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
"METAFLOW_SERVICE_URL": SERVICE_INTERNAL_URL,
"METAFLOW_SERVICE_HEADERS": json.dumps(SERVICE_HEADERS),
"METAFLOW_USER": "argo-workflows",
"METAFLOW_DATASTORE_SYSROOT_S3": DATASTORE_SYSROOT_S3,
"METAFLOW_DATATOOLS_S3ROOT": DATATOOLS_S3ROOT,
"METAFLOW_DEFAULT_DATASTORE": self.flow_datastore.TYPE,
"METAFLOW_DEFAULT_METADATA": DEFAULT_METADATA,
"METAFLOW_CARD_S3ROOT": CARD_S3ROOT,
"METAFLOW_KUBERNETES_WORKLOAD": 1,
"METAFLOW_KUBERNETES_FETCH_EC2_METADATA": KUBERNETES_FETCH_EC2_METADATA,
"METAFLOW_RUNTIME_ENVIRONMENT": "kubernetes",
"METAFLOW_OWNER": self.username,
},
**{
# Configuration for Argo Events. Keep these in sync with the
# environment variables for @kubernetes decorator.
"METAFLOW_ARGO_EVENTS_EVENT": ARGO_EVENTS_EVENT,
"METAFLOW_ARGO_EVENTS_EVENT_BUS": ARGO_EVENTS_EVENT_BUS,
"METAFLOW_ARGO_EVENTS_EVENT_SOURCE": ARGO_EVENTS_EVENT_SOURCE,
"METAFLOW_ARGO_EVENTS_SERVICE_ACCOUNT": ARGO_EVENTS_SERVICE_ACCOUNT,
"METAFLOW_ARGO_EVENTS_WEBHOOK_URL": ARGO_EVENTS_INTERNAL_WEBHOOK_URL,
"METAFLOW_ARGO_EVENTS_WEBHOOK_AUTH": ARGO_EVENTS_WEBHOOK_AUTH,
},
**{
# Some optional values for bookkeeping
"METAFLOW_FLOW_FILENAME": os.path.basename(sys.argv[0]),
"METAFLOW_FLOW_NAME": self.flow.name,
"METAFLOW_STEP_NAME": node.name,
"METAFLOW_RUN_ID": run_id,
# "METAFLOW_TASK_ID": task_id,
"METAFLOW_RETRY_COUNT": retry_count,
"METAFLOW_PRODUCTION_TOKEN": self.production_token,
"ARGO_WORKFLOW_TEMPLATE": self.name,
"ARGO_WORKFLOW_NAME": "{{workflow.name}}",
"ARGO_WORKFLOW_NAMESPACE": KUBERNETES_NAMESPACE,
},
**self.metadata.get_runtime_environment("argo-workflows"),
}
)
# add METAFLOW_S3_ENDPOINT_URL
env["METAFLOW_S3_ENDPOINT_URL"] = S3_ENDPOINT_URL
# support Metaflow sandboxes
env["METAFLOW_INIT_SCRIPT"] = KUBERNETES_SANDBOX_INIT_SCRIPT
env["METAFLOW_KUBERNETES_SANDBOX_INIT_SCRIPT"] = (
KUBERNETES_SANDBOX_INIT_SCRIPT
)
# support for @secret
env["METAFLOW_DEFAULT_SECRETS_BACKEND_TYPE"] = DEFAULT_SECRETS_BACKEND_TYPE
env["METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION"] = (
AWS_SECRETS_MANAGER_DEFAULT_REGION
)
env["METAFLOW_GCP_SECRET_MANAGER_PREFIX"] = GCP_SECRET_MANAGER_PREFIX
env["METAFLOW_AZURE_KEY_VAULT_PREFIX"] = AZURE_KEY_VAULT_PREFIX
# support for Azure
env["METAFLOW_AZURE_STORAGE_BLOB_SERVICE_ENDPOINT"] = (
AZURE_STORAGE_BLOB_SERVICE_ENDPOINT
)
env["METAFLOW_DATASTORE_SYSROOT_AZURE"] = DATASTORE_SYSROOT_AZURE
env["METAFLOW_CARD_AZUREROOT"] = CARD_AZUREROOT
env["METAFLOW_ARGO_WORKFLOWS_KUBERNETES_SECRETS"] = (
ARGO_WORKFLOWS_KUBERNETES_SECRETS
)
env["METAFLOW_ARGO_WORKFLOWS_ENV_VARS_TO_SKIP"] = (
ARGO_WORKFLOWS_ENV_VARS_TO_SKIP
)
# support for GCP
env["METAFLOW_DATASTORE_SYSROOT_GS"] = DATASTORE_SYSROOT_GS
env["METAFLOW_CARD_GSROOT"] = CARD_GSROOT
# Map Argo Events payload (if any) to environment variables
if self.triggers:
for event in self.triggers:
env[
"METAFLOW_ARGO_EVENT_PAYLOAD_%s_%s"
% (event["type"], event["sanitized_name"])
] = ("{{workflow.parameters.%s}}" % event["sanitized_name"])
# Map S3 upload headers to environment variables
if S3_SERVER_SIDE_ENCRYPTION is not None:
env["METAFLOW_S3_SERVER_SIDE_ENCRYPTION"] = S3_SERVER_SIDE_ENCRYPTION
metaflow_version = self.environment.get_environment_info()
metaflow_version["flow_name"] = self.graph.name
metaflow_version["production_token"] = self.production_token
env["METAFLOW_VERSION"] = json.dumps(metaflow_version)
# map config values
cfg_env = {
param["name"]: param["kv_name"] for param in self.config_parameters
}
if cfg_env:
env["METAFLOW_FLOW_CONFIG_VALUE"] = json.dumps(cfg_env)
# Set the template inputs and outputs for passing state. Very simply,
# the container template takes in input-paths as input and outputs
# the task-id (which feeds in as input-paths to the subsequent task).
# In addition to that, if the parent of the node under consideration
# is a for-each node, then we take the split-index as an additional
# input. Analogously, if the node under consideration is a foreach
# node, then we emit split cardinality as an extra output. I would like
# to thank the designers of Argo Workflows for making this so
# straightforward! Things become a bit more complicated to support very
# wide foreaches where we have to resort to passing a root-input-path
# so that we can compute the task ids for each parent task of a for-each
# join task deterministically inside the join task without resorting to
# passing a rather long list of (albiet compressed)
inputs = []
# To set the input-paths as a parameter, we need to ensure that the node
# is not (a start node or a parallel join node). Start nodes will have no
# input paths and parallel join will derive input paths based on a
# formulaic approach.
if not (
node.name == "start"
or (node.type == "join" and self.graph[node.in_funcs[0]].parallel_step)
):
inputs.append(Parameter("input-paths"))
if any(self.graph[n].type == "foreach" for n in node.in_funcs):
# Fetch split-index from parent
inputs.append(Parameter("split-index"))
if (
node.type == "join"
and self.graph[node.split_parents[-1]].type == "foreach"
):
# @parallel join tasks require `num-parallel` and `task-id-entropy`
# to construct the input paths, so we pass them down as input parameters.
if self.graph[node.split_parents[-1]].parallel_foreach:
inputs.extend(
[Parameter("num-parallel"), Parameter("task-id-entropy")]
)
else:
# append these only for joins of foreaches, not static splits
inputs.append(Parameter("split-cardinality"))
# check if the node is a @parallel node.
elif node.parallel_step:
inputs.extend(
[
Parameter("num-parallel"),
Parameter("task-id-entropy"),
Parameter("jobset-name"),
Parameter("workerCount"),
]
)
# {{retries}} is only available if retryStrategy is specified in the template.
# Only add retryCount input parameter if total_retries > 0.
if total_retries > 0:
inputs.append(Parameter("retryCount"))
if node.is_inside_foreach and self.graph[node.out_funcs[0]].type == "join":
if any(
self.graph[parent].matching_join
== self.graph[node.out_funcs[0]].name
for parent in self.graph[node.out_funcs[0]].split_parents
if self.graph[parent].type == "foreach"
) and any(not self.graph[f].type == "foreach" for f in node.in_funcs):
# we need to propagate the split-index and root-input-path info for
# the last step inside a foreach for correctly joining nested
# foreaches
if not any(self.graph[n].type == "foreach" for n in node.in_funcs):
# Don't add duplicate split index parameters.
inputs.append(Parameter("split-index"))
inputs.append(Parameter("root-input-path"))
outputs = []
# @parallel steps will not have a task-id as an output parameter since task-ids
# are derived at runtime.
if not (node.name == "end" or node.parallel_step):
outputs = [Parameter("task-id").valueFrom({"path": "/mnt/out/task_id"})]
# If this step is a split-switch one, we need to output the switch step name
if node.type == "split-switch":
outputs.append(
Parameter("switch-step").valueFrom({"path": "/mnt/out/switch_step"})
)
if node.type == "foreach":
# Emit split cardinality from foreach task
outputs.append(
Parameter("num-splits").valueFrom({"path": "/mnt/out/splits"})
)
outputs.append(
Parameter("split-cardinality").valueFrom(
{"path": "/mnt/out/split_cardinality"}
)
)
if node.parallel_foreach:
outputs.extend(
[
Parameter("num-parallel").valueFrom(
{"path": "/mnt/out/num_parallel"}
),
Parameter("task-id-entropy").valueFrom(
{"path": "/mnt/out/task_id_entropy"}
),
]
)
# Outputs should be defined over here and not in the _dag_template for @parallel.
# It makes no sense to set env vars to None (shows up as "None" string)
# Also we skip some env vars (e.g. in case we want to pull them from KUBERNETES_SECRETS)
env = {
k: v
for k, v in env.items()
if v is not None
and k not in set(ARGO_WORKFLOWS_ENV_VARS_TO_SKIP.split(","))
}
# Tmpfs variables
use_tmpfs = resources["use_tmpfs"]
tmpfs_size = resources["tmpfs_size"]
tmpfs_path = resources["tmpfs_path"]
tmpfs_tempdir = resources["tmpfs_tempdir"]
# Set shared_memory to 0 if it isn't specified. This results
# in Kubernetes using it's default value when the pod is created.
shared_memory = resources.get("shared_memory", 0)
port = resources.get("port", None)
if port:
port = int(port)
tmpfs_enabled = use_tmpfs or (tmpfs_size and not use_tmpfs)
if tmpfs_enabled and tmpfs_tempdir:
env["METAFLOW_TEMPDIR"] = tmpfs_path
qos_requests, qos_limits = qos_requests_and_limits(
resources["qos"],
resources["cpu"],
resources["memory"],
resources["disk"],
)
security_context = resources.get("security_context", None)
_security_context = {}
if security_context is not None and len(security_context) > 0:
_security_context = {
"security_context": kubernetes_sdk.V1SecurityContext(
**security_context
)
}
# Create a ContainerTemplate for this node. Ideally, we would have
# liked to inline this ContainerTemplate and avoid scanning the workflow
# twice, but due to issues with variable substitution, we will have to
# live with this routine.
if node.parallel_step:
jobset_name = "{{inputs.parameters.jobset-name}}"
jobset = KubernetesArgoJobSet(
kubernetes_sdk=kubernetes_sdk,
name=jobset_name,
flow_name=self.flow.name,
run_id=run_id,
step_name=self._sanitize(node.name),
task_id=task_id,
attempt=retry_count,
user=self.username,
subdomain=jobset_name,
command=cmds,
namespace=resources["namespace"],
image=resources["image"],
image_pull_policy=resources["image_pull_policy"],
image_pull_secrets=resources["image_pull_secrets"],
service_account=resources["service_account"],
secrets=(
[
k
for k in (
list(
[]
if not resources.get("secrets")
else (
[resources.get("secrets")]
if isinstance(resources.get("secrets"), str)
else resources.get("secrets")
)
)
+ KUBERNETES_SECRETS.split(",")
+ ARGO_WORKFLOWS_KUBERNETES_SECRETS.split(",")
)
if k
]
),
node_selector=resources.get("node_selector"),
cpu=str(resources["cpu"]),
memory=str(resources["memory"]),
disk=str(resources["disk"]),
gpu=resources["gpu"],
gpu_vendor=str(resources["gpu_vendor"]),
tolerations=resources["tolerations"],
use_tmpfs=use_tmpfs,
tmpfs_tempdir=tmpfs_tempdir,
tmpfs_size=tmpfs_size,
tmpfs_path=tmpfs_path,
timeout_in_seconds=run_time_limit,
persistent_volume_claims=resources["persistent_volume_claims"],
shared_memory=shared_memory,
port=port,
qos=resources["qos"],
security_context=security_context,
)
for k, v in env.items():
jobset.environment_variable(k, v)
# Set labels. Do not allow user-specified task labels to override internal ones.
#
# Explicitly add the task-id-hint label. This is important because this label
# is returned as an Output parameter of this step and is used subsequently as an
# an input in the join step.
kubernetes_labels = {
"task_id_entropy": "{{inputs.parameters.task-id-entropy}}",
"num_parallel": "{{inputs.parameters.num-parallel}}",
"metaflow/argo-workflows-name": "{{workflow.name}}",
"workflows.argoproj.io/workflow": "{{workflow.name}}",
}
jobset.labels(
{
**resources["labels"],
**self._base_labels,
**kubernetes_labels,
}
)
jobset.environment_variable(
"MF_MASTER_ADDR", jobset.jobset_control_addr
)
jobset.environment_variable("MF_MASTER_PORT", str(port))
jobset.environment_variable(
"MF_WORLD_SIZE", "{{inputs.parameters.num-parallel}}"
)
# We need this task-id set so that all the nodes are aware of the control
# task's task-id. These "MF_" variables populate the `current.parallel` namedtuple
jobset.environment_variable(
"MF_PARALLEL_CONTROL_TASK_ID",
"control-{{inputs.parameters.task-id-entropy}}-0",
)
# for k, v in .items():
jobset.environment_variables_from_selectors(
{
"MF_WORKER_REPLICA_INDEX": "metadata.annotations['jobset.sigs.k8s.io/job-index']",
"JOBSET_RESTART_ATTEMPT": "metadata.annotations['jobset.sigs.k8s.io/restart-attempt']",
"METAFLOW_KUBERNETES_JOBSET_NAME": "metadata.annotations['jobset.sigs.k8s.io/jobset-name']",
"METAFLOW_KUBERNETES_POD_NAMESPACE": "metadata.namespace",
"METAFLOW_KUBERNETES_POD_NAME": "metadata.name",
"METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
"METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
"METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
"TASK_ID_SUFFIX": "metadata.annotations['jobset.sigs.k8s.io/job-index']",
}
)
# Set annotations. Do not allow user-specified task-specific annotations to override internal ones.
annotations = {
# setting annotations explicitly as they wont be
# passed down from WorkflowTemplate level
"metaflow/step_name": node.name,
"metaflow/attempt": str(retry_count),
"metaflow/run_id": run_id,
}
jobset.annotations(
{
**resources["annotations"],
**self._base_annotations,
**annotations,
}
)
jobset.control.replicas(1)
jobset.worker.replicas("{{=asInt(inputs.parameters.workerCount)}}")
jobset.control.environment_variable("UBF_CONTEXT", UBF_CONTROL)
jobset.worker.environment_variable("UBF_CONTEXT", UBF_TASK)
jobset.control.environment_variable("MF_CONTROL_INDEX", "0")
# `TASK_ID_PREFIX` needs to explicitly be `control` or `worker`
# because the join task uses a formulaic approach to infer the task-ids
jobset.control.environment_variable("TASK_ID_PREFIX", "control")
jobset.worker.environment_variable("TASK_ID_PREFIX", "worker")
yield (
Template(ArgoWorkflows._sanitize(node.name))
.resource(
"create",
jobset.dump(),
"status.terminalState == Completed",
"status.terminalState == Failed",
)
.inputs(Inputs().parameters(inputs))
.outputs(
Outputs().parameters(
[
Parameter("task-id-entropy").valueFrom(
{"jsonPath": "{.metadata.labels.task_id_entropy}"}
),
Parameter("num-parallel").valueFrom(
{"jsonPath": "{.metadata.labels.num_parallel}"}
),
]
)
)
.retry_strategy(
times=total_retries,
minutes_between_retries=minutes_between_retries,
)
)
else:
template_name = self._sanitize(node.name)
if self._is_recursive_node(node):
# The recursive template has the original step name,
# this becomes a template within the recursive ones 'steps'
template_name = self._sanitize("recursive-%s" % node.name)
yield (
Template(template_name)
# Set @timeout values
.active_deadline_seconds(run_time_limit)
# Set service account
.service_account_name(resources["service_account"])
# Configure template input
.inputs(Inputs().parameters(inputs))
# Configure template output
.outputs(Outputs().parameters(outputs))
# Fail fast!
.fail_fast()
# Set @retry/@catch values
.retry_strategy(
times=total_retries,
minutes_between_retries=minutes_between_retries,
)
.metadata(
ObjectMeta()
.annotation("metaflow/step_name", node.name)
# Unfortunately, we can't set the task_id since it is generated
# inside the pod. However, it can be inferred from the annotation
# set by argo-workflows - `workflows.argoproj.io/outputs` - refer
# the field 'task-id' in 'parameters'
# .annotation("metaflow/task_id", ...)
.annotation("metaflow/attempt", retry_count)
.annotations(resources["annotations"])
.labels(resources["labels"])
)
# Set emptyDir volume for state management
.empty_dir_volume("out")
# Set tmpfs emptyDir volume if enabled
.empty_dir_volume(
"tmpfs-ephemeral-volume",
medium="Memory",
size_limit=tmpfs_size if tmpfs_enabled else 0,
)
.empty_dir_volume("dhsm", medium="Memory", size_limit=shared_memory)
.pvc_volumes(resources.get("persistent_volume_claims"))
# Set node selectors
.node_selectors(resources.get("node_selector"))
# Set tolerations
.tolerations(resources.get("tolerations"))
# Set image pull secrets if present. We need to use pod_spec_patch due to Argo not supporting this on a template level.
.pod_spec_patch(
{
"imagePullSecrets": [
{"name": secret}
for secret in resources["image_pull_secrets"]
]
}
if resources["image_pull_secrets"]
else None
)
# Set container
.container(
# TODO: Unify the logic with kubernetes.py
# Important note - Unfortunately, V1Container uses snakecase while
# Argo Workflows uses camel. For most of the attributes, both cases
# are indistinguishable, but unfortunately, not for all - (
# env_from, value_from, etc.) - so we need to handle the conversion
# ourselves using to_camelcase. We need to be vigilant about
# resources attributes in particular where the keys maybe user
# defined.
to_camelcase(
kubernetes_sdk.V1Container(
name=self._sanitize(node.name),
command=cmds,
termination_message_policy="FallbackToLogsOnError",
ports=(
[
kubernetes_sdk.V1ContainerPort(
container_port=port
)
]
if port
else None
),
env=[
kubernetes_sdk.V1EnvVar(name=k, value=str(v))
for k, v in env.items()
]
# Add environment variables for book-keeping.
# https://argoproj.github.io/argo-workflows/fields/#fields_155
+ [
kubernetes_sdk.V1EnvVar(
name=k,
value_from=kubernetes_sdk.V1EnvVarSource(
field_ref=kubernetes_sdk.V1ObjectFieldSelector(
field_path=str(v)
)
),
)
for k, v in {
"METAFLOW_KUBERNETES_NAMESPACE": "metadata.namespace",
"METAFLOW_KUBERNETES_POD_NAMESPACE": "metadata.namespace",
"METAFLOW_KUBERNETES_POD_NAME": "metadata.name",
"METAFLOW_KUBERNETES_POD_ID": "metadata.uid",
"METAFLOW_KUBERNETES_SERVICE_ACCOUNT_NAME": "spec.serviceAccountName",
"METAFLOW_KUBERNETES_NODE_IP": "status.hostIP",
}.items()
],
image=resources["image"],
image_pull_policy=resources["image_pull_policy"],
resources=kubernetes_sdk.V1ResourceRequirements(
requests=qos_requests,
limits={
**qos_limits,
**{
"%s.com/gpu".lower()
% resources["gpu_vendor"]: str(
resources["gpu"]
)
for k in [0]
if resources["gpu"] is not None
},
},
),
# Configure secrets
env_from=[
kubernetes_sdk.V1EnvFromSource(
secret_ref=kubernetes_sdk.V1SecretEnvSource(
name=str(k),
# optional=True
)
)
for k in list(
[]
if not resources.get("secrets")
else (
[resources.get("secrets")]
if isinstance(resources.get("secrets"), str)
else resources.get("secrets")
)
)
+ KUBERNETES_SECRETS.split(",")
+ ARGO_WORKFLOWS_KUBERNETES_SECRETS.split(",")
if k
],
volume_mounts=[
# Assign a volume mount to pass state to the next task.
kubernetes_sdk.V1VolumeMount(
name="out", mount_path="/mnt/out"
)
]
# Support tmpfs.
+ (
[
kubernetes_sdk.V1VolumeMount(
name="tmpfs-ephemeral-volume",
mount_path=tmpfs_path,
)
]
if tmpfs_enabled
else []
)
# Support shared_memory
+ (
[
kubernetes_sdk.V1VolumeMount(
name="dhsm",
mount_path="/dev/shm",
)
]
if shared_memory
else []
)
# Support persistent volume claims.
+ (
[
kubernetes_sdk.V1VolumeMount(
name=claim, mount_path=path
)
for claim, path in resources.get(
"persistent_volume_claims"
).items()
]
if resources.get("persistent_volume_claims")
is not None
else []
),
**_security_context,
).to_dict()
)
)
)
# Return daemon container templates for workflow execution notifications.
def _daemon_templates(self):
templates = []
if self.enable_heartbeat_daemon:
templates.append(self._heartbeat_daemon_template())
return templates
# Return lifecycle hooks for workflow execution notifications.
def _lifecycle_hooks(self):
hooks = []
if self.notify_on_error:
hooks.append(self._slack_error_template())
hooks.append(self._pager_duty_alert_template())
hooks.append(self._incident_io_alert_template())
if self.notify_on_success:
hooks.append(self._slack_success_template())
hooks.append(self._pager_duty_change_template())
hooks.append(self._incident_io_change_template())
exit_hook_decos = self.flow._flow_decorators.get("exit_hook", [])
for deco in exit_hook_decos:
hooks.extend(self._lifecycle_hook_from_deco(deco))
# Clean up None values from templates.
hooks = list(filter(None, hooks))
if hooks:
hooks.append(
ExitHookHack(
url=(
self.notify_slack_webhook_url
or "https://events.pagerduty.com/v2/enqueue"
)
)
)
return hooks
def _lifecycle_hook_from_deco(self, deco):
from kubernetes import client as kubernetes_sdk
start_step = [step for step in self.graph if step.name == "start"][0]
# We want to grab the base image used by the start step, as this is known to be pullable from within the cluster,
# and it might contain the required libraries, allowing us to start up faster.
start_kube_deco = [
deco for deco in start_step.decorators if deco.name == "kubernetes"
][0]
resources = dict(start_kube_deco.attributes)
kube_defaults = dict(start_kube_deco.defaults)
run_id_template = "argo-{{workflow.name}}"
metaflow_version = self.environment.get_environment_info()
metaflow_version["flow_name"] = self.graph.name
metaflow_version["production_token"] = self.production_token
env = {
# These values are needed by Metaflow to set it's internal
# state appropriately.
"METAFLOW_CODE_URL": self.code_package_url,
"METAFLOW_CODE_SHA": self.code_package_sha,
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
"METAFLOW_SERVICE_URL": SERVICE_INTERNAL_URL,
"METAFLOW_SERVICE_HEADERS": json.dumps(SERVICE_HEADERS),
"METAFLOW_USER": "argo-workflows",
"METAFLOW_S3_ENDPOINT_URL": S3_ENDPOINT_URL,
"METAFLOW_DEFAULT_DATASTORE": self.flow_datastore.TYPE,
"METAFLOW_DEFAULT_METADATA": DEFAULT_METADATA,
"METAFLOW_OWNER": self.username,
}
# pass on the Run pathspec for script
env["RUN_PATHSPEC"] = f"{self.graph.name}/{run_id_template}"
# support Metaflow sandboxes
env["METAFLOW_INIT_SCRIPT"] = KUBERNETES_SANDBOX_INIT_SCRIPT
env["METAFLOW_WORKFLOW_NAME"] = "{{workflow.name}}"
env["METAFLOW_WORKFLOW_NAMESPACE"] = "{{workflow.namespace}}"
env = {
k: v
for k, v in env.items()
if v is not None
and k not in set(ARGO_WORKFLOWS_ENV_VARS_TO_SKIP.split(","))
}
def _cmd(fn_name):
mflog_expr = export_mflog_env_vars(
datastore_type=self.flow_datastore.TYPE,
stdout_path="$PWD/.logs/mflog_stdout",
stderr_path="$PWD/.logs/mflog_stderr",
flow_name=self.flow.name,
run_id=run_id_template,
step_name=f"_hook_{fn_name}",
task_id="1",
retry_count="0",
)
cmds = " && ".join(
[
# For supporting sandboxes, ensure that a custom script is executed
# before anything else is executed. The script is passed in as an
# env var.
'${METAFLOW_INIT_SCRIPT:+eval \\"${METAFLOW_INIT_SCRIPT}\\"}',
"mkdir -p $PWD/.logs",
mflog_expr,
]
+ self.environment.get_package_commands(
self.code_package_url, self.flow_datastore.TYPE
)[:-1]
# Replace the line 'Task in starting'
+ [f"mflog 'Lifecycle hook {fn_name} is starting.'"]
+ [
f"python -m metaflow.plugins.exit_hook.exit_hook_script {metaflow_version['script']} {fn_name} $RUN_PATHSPEC"
]
)
cmds = shlex.split('bash -c "%s"' % cmds)
return cmds
def _container(cmds):
return to_camelcase(
kubernetes_sdk.V1Container(
name="main",
command=cmds,
image=deco.attributes["options"].get("image", None)
or resources["image"],
env=[
kubernetes_sdk.V1EnvVar(name=k, value=str(v))
for k, v in env.items()
],
env_from=[
kubernetes_sdk.V1EnvFromSource(
secret_ref=kubernetes_sdk.V1SecretEnvSource(
name=str(k),
# optional=True
)
)
for k in list(
[]
if not resources.get("secrets")
else (
[resources.get("secrets")]
if isinstance(resources.get("secrets"), str)
else resources.get("secrets")
)
)
+ KUBERNETES_SECRETS.split(",")
+ ARGO_WORKFLOWS_KUBERNETES_SECRETS.split(",")
if k
],
resources=kubernetes_sdk.V1ResourceRequirements(
requests={
"cpu": str(kube_defaults["cpu"]),
"memory": "%sM" % str(kube_defaults["memory"]),
}
),
).to_dict()
)
# create lifecycle hooks from deco
hooks = []
for success_fn_name in deco.success_hooks:
hook = ContainerHook(
name=f"success-{success_fn_name.replace('_', '-')}",
container=_container(cmds=_cmd(success_fn_name)),
service_account_name=resources["service_account"],
on_success=True,
)
hooks.append(hook)
for error_fn_name in deco.error_hooks:
hook = ContainerHook(
name=f"error-{error_fn_name.replace('_', '-')}",
service_account_name=resources["service_account"],
container=_container(cmds=_cmd(error_fn_name)),
on_error=True,
)
hooks.append(hook)
return hooks
def _exit_hook_templates(self):
templates = []
if self.enable_error_msg_capture:
templates.extend(self._error_msg_capture_hook_templates())
return templates
def _error_msg_capture_hook_templates(self):
from kubernetes import client as kubernetes_sdk
start_step = [step for step in self.graph if step.name == "start"][0]
# We want to grab the base image used by the start step, as this is known to be pullable from within the cluster,
# and it might contain the required libraries, allowing us to start up faster.
resources = dict(
[deco for deco in start_step.decorators if deco.name == "kubernetes"][
0
].attributes
)
run_id_template = "argo-{{workflow.name}}"
metaflow_version = self.environment.get_environment_info()
metaflow_version["flow_name"] = self.graph.name
metaflow_version["production_token"] = self.production_token
mflog_expr = export_mflog_env_vars(
datastore_type=self.flow_datastore.TYPE,
stdout_path="$PWD/.logs/mflog_stdout",
stderr_path="$PWD/.logs/mflog_stderr",
flow_name=self.flow.name,
run_id=run_id_template,
step_name="_run_capture_error",
task_id="1",
retry_count="0",
)
cmds = " && ".join(
[
# For supporting sandboxes, ensure that a custom script is executed
# before anything else is executed. The script is passed in as an
# env var.
'${METAFLOW_INIT_SCRIPT:+eval \\"${METAFLOW_INIT_SCRIPT}\\"}',
"mkdir -p $PWD/.logs",
mflog_expr,
]
+ self.environment.get_package_commands(
self.code_package_url,
self.flow_datastore.TYPE,
self.code_package_metadata,
)[:-1]
# Replace the line 'Task in starting'
# FIXME: this can be brittle.
+ ["mflog 'Error capture hook is starting.'"]
+ ["argo_error=$(python -m 'metaflow.plugins.argo.capture_error')"]
+ ["export METAFLOW_ARGO_ERROR=$argo_error"]
+ [
"""python -c 'import json, os; error_obj=os.getenv(\\"METAFLOW_ARGO_ERROR\\");data=json.loads(error_obj); print(data[\\"message\\"])'"""
]
+ [
'if [ -n \\"${METAFLOW_ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT}\\" ]; then eval \\"${METAFLOW_ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT}\\"; fi'
]
)
# TODO: Also capture the first failed task id
cmds = shlex.split('bash -c "%s"' % cmds)
env = {
# These values are needed by Metaflow to set it's internal
# state appropriately.
"METAFLOW_CODE_METADATA": self.code_package_metadata,
"METAFLOW_CODE_URL": self.code_package_url,
"METAFLOW_CODE_SHA": self.code_package_sha,
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
"METAFLOW_SERVICE_URL": SERVICE_INTERNAL_URL,
"METAFLOW_SERVICE_HEADERS": json.dumps(SERVICE_HEADERS),
"METAFLOW_USER": "argo-workflows",
"METAFLOW_S3_ENDPOINT_URL": S3_ENDPOINT_URL,
"METAFLOW_DEFAULT_DATASTORE": self.flow_datastore.TYPE,
"METAFLOW_DEFAULT_METADATA": DEFAULT_METADATA,
"METAFLOW_OWNER": self.username,
}
# support Metaflow sandboxes
env["METAFLOW_INIT_SCRIPT"] = KUBERNETES_SANDBOX_INIT_SCRIPT
env["METAFLOW_ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT"] = (
ARGO_WORKFLOWS_CAPTURE_ERROR_SCRIPT
)
env["METAFLOW_WORKFLOW_NAME"] = "{{workflow.name}}"
env["METAFLOW_WORKFLOW_NAMESPACE"] = "{{workflow.namespace}}"
env["METAFLOW_ARGO_WORKFLOW_FAILURES"] = "{{workflow.failures}}"
env = {
k: v
for k, v in env.items()
if v is not None
and k not in set(ARGO_WORKFLOWS_ENV_VARS_TO_SKIP.split(","))
}
return [
Template("error-msg-capture-hook")
.service_account_name(resources["service_account"])
.container(
to_camelcase(
kubernetes_sdk.V1Container(
name="main",
command=cmds,
image=resources["image"],
env=[
kubernetes_sdk.V1EnvVar(name=k, value=str(v))
for k, v in env.items()
],
env_from=[
kubernetes_sdk.V1EnvFromSource(
secret_ref=kubernetes_sdk.V1SecretEnvSource(
name=str(k),
# optional=True
)
)
for k in list(
[]
if not resources.get("secrets")
else (
[resources.get("secrets")]
if isinstance(resources.get("secrets"), str)
else resources.get("secrets")
)
)
+ KUBERNETES_SECRETS.split(",")
+ ARGO_WORKFLOWS_KUBERNETES_SECRETS.split(",")
if k
],
resources=kubernetes_sdk.V1ResourceRequirements(
# NOTE: base resources for this are kept to a minimum to save on running costs.
# This has an adverse effect on startup time for the daemon, which can be completely
# alleviated by using a base image that has the required dependencies pre-installed
requests={
"cpu": "200m",
"memory": "100Mi",
},
limits={
"cpu": "200m",
"memory": "500Mi",
},
),
).to_dict()
)
),
Template("capture-error-hook-fn-preflight").steps(
[
WorkflowStep()
.name("capture-error-hook-fn-preflight")
.template("error-msg-capture-hook")
.when("{{workflow.status}} != Succeeded")
]
),
]
def _pager_duty_alert_template(self):
# https://developer.pagerduty.com/docs/ZG9jOjExMDI5NTgx-send-an-alert-event
if self.notify_pager_duty_integration_key is None:
return None
return HttpExitHook(
name="notify-pager-duty-on-error",
method="POST",
url="https://events.pagerduty.com/v2/enqueue",
headers={"Content-Type": "application/json"},
body=json.dumps(
{
"event_action": "trigger",
"routing_key": self.notify_pager_duty_integration_key,
# "dedup_key": self.flow.name, # TODO: Do we need deduplication?
"payload": {
"source": "{{workflow.name}}",
"severity": "info",
"summary": "Metaflow run %s/argo-{{workflow.name}} failed!"
% self.flow.name,
"custom_details": {
"Flow": self.flow.name,
"Run ID": "argo-{{workflow.name}}",
},
},
"links": self._pager_duty_notification_links(),
}
),
on_error=True,
)
def _incident_io_alert_template(self):
if self.notify_incident_io_api_key is None:
return None
if self.incident_io_alert_source_config_id is None:
raise MetaflowException(
"Creating alerts for errors requires a alert source config ID."
)
ui_links = self._incident_io_ui_urls_for_run()
return HttpExitHook(
name="notify-incident-io-on-error",
method="POST",
url=(
"https://api.incident.io/v2/alert_events/http/%s"
% self.incident_io_alert_source_config_id
),
headers={
"Content-Type": "application/json",
"Authorization": "Bearer %s" % self.notify_incident_io_api_key,
},
body=json.dumps(
{
"idempotency_key": "argo-{{workflow.name}}", # use run id to deduplicate alerts.
"status": "firing",
"title": "Flow %s has failed." % self.flow.name,
"description": "Metaflow run {run_pathspec} failed!{urls}".format(
run_pathspec="%s/argo-{{workflow.name}}" % self.flow.name,
urls=(
"\n\nSee details for the run at:\n\n"
+ "\n\n".join(ui_links)
if ui_links
else ""
),
),
"source_url": (
"%s/%s/%s"
% (
UI_URL.rstrip("/"),
self.flow.name,
"argo-{{workflow.name}}",
)
if UI_URL
else None
),
"metadata": {
**(self.incident_io_metadata or {}),
**{
"run_status": "failed",
"flow_name": self.flow.name,
"run_id": "argo-{{workflow.name}}",
},
},
}
),
on_error=True,
)
def _incident_io_change_template(self):
if self.notify_incident_io_api_key is None:
return None
if self.incident_io_alert_source_config_id is None:
raise MetaflowException(
"Creating alerts for successes requires an alert source config ID."
)
ui_links = self._incident_io_ui_urls_for_run()
return HttpExitHook(
name="notify-incident-io-on-success",
method="POST",
url=(
"https://api.incident.io/v2/alert_events/http/%s"
% self.incident_io_alert_source_config_id
),
headers={
"Content-Type": "application/json",
"Authorization": "Bearer %s" % self.notify_incident_io_api_key,
},
body=json.dumps(
{
"idempotency_key": "argo-{{workflow.name}}", # use run id to deduplicate alerts.
"status": "firing",
"title": "Flow %s has succeeded." % self.flow.name,
"description": "Metaflow run {run_pathspec} succeeded!{urls}".format(
run_pathspec="%s/argo-{{workflow.name}}" % self.flow.name,
urls=(
"\n\nSee details for the run at:\n\n"
+ "\n\n".join(ui_links)
if ui_links
else ""
),
),
"source_url": (
"%s/%s/%s"
% (
UI_URL.rstrip("/"),
self.flow.name,
"argo-{{workflow.name}}",
)
if UI_URL
else None
),
"metadata": {
**(self.incident_io_metadata or {}),
**{
"run_status": "succeeded",
"flow_name": self.flow.name,
"run_id": "argo-{{workflow.name}}",
},
},
}
),
on_success=True,
)
def _incident_io_ui_urls_for_run(self):
links = []
if UI_URL:
url = "[Metaflow UI](%s/%s/%s)" % (
UI_URL.rstrip("/"),
self.flow.name,
"argo-{{workflow.name}}",
)
links.append(url)
if ARGO_WORKFLOWS_UI_URL:
url = "[Argo UI](%s/workflows/%s/%s)" % (
ARGO_WORKFLOWS_UI_URL.rstrip("/"),
"{{workflow.namespace}}",
"{{workflow.name}}",
)
links.append(url)
return links
def _pager_duty_change_template(self):
# https://developer.pagerduty.com/docs/ZG9jOjExMDI5NTgy-send-a-change-event
if self.notify_pager_duty_integration_key is None:
return None
return HttpExitHook(
name="notify-pager-duty-on-success",
method="POST",
url="https://events.pagerduty.com/v2/change/enqueue",
headers={"Content-Type": "application/json"},
body=json.dumps(
{
"routing_key": self.notify_pager_duty_integration_key,
"payload": {
"summary": "Metaflow run %s/argo-{{workflow.name}} Succeeded"
% self.flow.name,
"source": "{{workflow.name}}",
"custom_details": {
"Flow": self.flow.name,
"Run ID": "argo-{{workflow.name}}",
},
},
"links": self._pager_duty_notification_links(),
}
),
on_success=True,
)
def _pager_duty_notification_links(self):
links = []
if UI_URL:
links.append(
{
"href": "%s/%s/%s"
% (UI_URL.rstrip("/"), self.flow.name, "argo-{{workflow.name}}"),
"text": "Metaflow UI",
}
)
if ARGO_WORKFLOWS_UI_URL:
links.append(
{
"href": "%s/workflows/%s/%s"
% (
ARGO_WORKFLOWS_UI_URL.rstrip("/"),
"{{workflow.namespace}}",
"{{workflow.name}}",
),
"text": "Argo UI",
}
)
return links
def _get_slack_blocks(self, message):
"""
Use Slack's Block Kit to add general information about the environment and
execution metadata, including a link to the UI and an optional message.
"""
ui_link = "%s/%s/argo-{{workflow.name}}" % (UI_URL.rstrip("/"), self.flow.name)
# fmt: off
if getattr(current, "project_name", None):
# Add @project metadata when available.
environment_details_block = {
"type": "section",
"text": {
"type": "mrkdwn",
"text": "Environment details"
},
"fields": [
{
"type": "mrkdwn",
"text": "*Project:* %s" % current.project_name
},
{
"type": "mrkdwn",
"text": "*Project Branch:* %s" % current.branch_name
}
]
}
else:
environment_details_block = {
"type": "section",
"text": {
"type": "mrkdwn",
"text": "Environment details"
}
}
blocks = [
environment_details_block,
{
"type": "context",
"elements": [
{
"type": "mrkdwn",
"text": " :information_source: *<%s>*" % ui_link,
}
],
},
{
"type": "divider"
},
]
if message:
blocks += [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": message
}
}
]
# fmt: on
return blocks
def _slack_error_template(self):
if self.notify_slack_webhook_url is None:
return None
message = (
":rotating_light: _%s/argo-{{workflow.name}}_ failed!" % self.flow.name
)
payload = {"text": message}
if UI_URL:
blocks = self._get_slack_blocks(message)
payload = {"text": message, "blocks": blocks}
return HttpExitHook(
name="notify-slack-on-error",
method="POST",
url=self.notify_slack_webhook_url,
body=json.dumps(payload),
on_error=True,
)
def _slack_success_template(self):
if self.notify_slack_webhook_url is None:
return None
message = (
":white_check_mark: _%s/argo-{{workflow.name}}_ succeeded!" % self.flow.name
)
payload = {"text": message}
if UI_URL:
blocks = self._get_slack_blocks(message)
payload = {"text": message, "blocks": blocks}
return HttpExitHook(
name="notify-slack-on-success",
method="POST",
url=self.notify_slack_webhook_url,
body=json.dumps(payload),
on_success=True,
)
def _heartbeat_daemon_template(self):
# Use all the affordances available to _parameters task
executable = self.environment.executable("_parameters")
run_id = "argo-{{workflow.name}}"
script_name = os.path.basename(sys.argv[0])
entrypoint = [executable, script_name]
# FlowDecorators can define their own top-level options. These might affect run level information
# so it is important to pass these to the heartbeat process as well, as it might be the first task to register a run.
top_opts_dict = {}
for deco in flow_decorators(self.flow):
top_opts_dict.update(deco.get_top_level_options())
top_level = list(dict_to_cli_options(top_opts_dict)) + [
"--quiet",
"--metadata=%s" % self.metadata.TYPE,
"--environment=%s" % self.environment.TYPE,
"--datastore=%s" % self.flow_datastore.TYPE,
"--datastore-root=%s" % self.flow_datastore.datastore_root,
"--event-logger=%s" % self.event_logger.TYPE,
"--monitor=%s" % self.monitor.TYPE,
"--no-pylint",
"--with=argo_workflows_internal:auto-emit-argo-events=%i"
% self.auto_emit_argo_events,
]
heartbeat_cmds = "{entrypoint} {top_level} argo-workflows heartbeat --run_id {run_id} {tags}".format(
entrypoint=" ".join(entrypoint),
top_level=" ".join(top_level) if top_level else "",
run_id=run_id,
tags=" ".join(["--tag %s" % t for t in self.tags]) if self.tags else "",
)
# TODO: we do not really need MFLOG logging for the daemon at the moment, but might be good for the future.
# Consider if we can do without this setup.
# Configure log capture.
mflog_expr = export_mflog_env_vars(
datastore_type=self.flow_datastore.TYPE,
stdout_path="$PWD/.logs/mflog_stdout",
stderr_path="$PWD/.logs/mflog_stderr",
flow_name=self.flow.name,
run_id=run_id,
step_name="_run_heartbeat_daemon",
task_id="1",
retry_count="0",
)
# TODO: Can the init be trimmed down?
# Can we do without get_package_commands fetching the whole code package?
init_cmds = " && ".join(
[
# For supporting sandboxes, ensure that a custom script is executed
# before anything else is executed. The script is passed in as an
# env var.
'${METAFLOW_INIT_SCRIPT:+eval \\"${METAFLOW_INIT_SCRIPT}\\"}',
"mkdir -p $PWD/.logs",
mflog_expr,
]
+ self.environment.get_package_commands(
self.code_package_url,
self.flow_datastore.TYPE,
)[:-1]
# Replace the line 'Task in starting'
# FIXME: this can be brittle.
+ ["mflog 'Heartbeat daemon is starting.'"]
)
cmd_str = " && ".join([init_cmds, heartbeat_cmds])
cmds = shlex.split('bash -c "%s"' % cmd_str)
# Env required for sending heartbeats to the metadata service, nothing extra.
# prod token / runtime info is required to correctly register flow branches
env = {
# These values are needed by Metaflow to set it's internal
# state appropriately.
"METAFLOW_CODE_METADATA": self.code_package_metadata,
"METAFLOW_CODE_URL": self.code_package_url,
"METAFLOW_CODE_SHA": self.code_package_sha,
"METAFLOW_CODE_DS": self.flow_datastore.TYPE,
"METAFLOW_SERVICE_URL": SERVICE_INTERNAL_URL,
"METAFLOW_SERVICE_HEADERS": json.dumps(SERVICE_HEADERS),
"METAFLOW_USER": "argo-workflows",
"METAFLOW_S3_ENDPOINT_URL": S3_ENDPOINT_URL,
"METAFLOW_DATASTORE_SYSROOT_S3": DATASTORE_SYSROOT_S3,
"METAFLOW_DATATOOLS_S3ROOT": DATATOOLS_S3ROOT,
"METAFLOW_DEFAULT_DATASTORE": self.flow_datastore.TYPE,
"METAFLOW_DEFAULT_METADATA": DEFAULT_METADATA,
"METAFLOW_CARD_S3ROOT": CARD_S3ROOT,
"METAFLOW_KUBERNETES_WORKLOAD": 1,
"METAFLOW_KUBERNETES_FETCH_EC2_METADATA": KUBERNETES_FETCH_EC2_METADATA,
"METAFLOW_RUNTIME_ENVIRONMENT": "kubernetes",
"METAFLOW_OWNER": self.username,
"METAFLOW_PRODUCTION_TOKEN": self.production_token, # Used in identity resolving. This affects system tags.
}
# support Metaflow sandboxes
env["METAFLOW_INIT_SCRIPT"] = KUBERNETES_SANDBOX_INIT_SCRIPT
# cleanup env values
env = {
k: v
for k, v in env.items()
if v is not None
and k not in set(ARGO_WORKFLOWS_ENV_VARS_TO_SKIP.split(","))
}
# We want to grab the base image used by the start step, as this is known to be pullable from within the cluster,
# and it might contain the required libraries, allowing us to start up faster.
start_step = next(step for step in self.flow if step.name == "start")
resources = dict(
[deco for deco in start_step.decorators if deco.name == "kubernetes"][
0
].attributes
)
from kubernetes import client as kubernetes_sdk
return (
DaemonTemplate("heartbeat-daemon")
# NOTE: Even though a retry strategy does not work for Argo daemon containers,
# this has the side-effect of protecting the exit hooks of the workflow from failing in case the daemon container errors out.
.retry_strategy(10, 1)
.service_account_name(resources["service_account"])
.container(
to_camelcase(
kubernetes_sdk.V1Container(
name="main",
# TODO: Make the image configurable
image=resources["image"],
command=cmds,
env=[
kubernetes_sdk.V1EnvVar(name=k, value=str(v))
for k, v in env.items()
],
env_from=[
kubernetes_sdk.V1EnvFromSource(
secret_ref=kubernetes_sdk.V1SecretEnvSource(
name=str(k),
# optional=True
)
)
for k in list(
[]
if not resources.get("secrets")
else (
[resources.get("secrets")]
if isinstance(resources.get("secrets"), str)
else resources.get("secrets")
)
)
+ KUBERNETES_SECRETS.split(",")
+ ARGO_WORKFLOWS_KUBERNETES_SECRETS.split(",")
if k
],
resources=kubernetes_sdk.V1ResourceRequirements(
# NOTE: base resources for this are kept to a minimum to save on running costs.
# This has an adverse effect on startup time for the daemon, which can be completely
# alleviated by using a base image that has the required dependencies pre-installed
requests={
"cpu": "200m",
"memory": "100Mi",
},
limits={
"cpu": "200m",
"memory": "100Mi",
},
),
)
).to_dict()
)
)
def _compile_sensor(self):
# This method compiles a Metaflow @trigger decorator into Argo Events Sensor.
#
# Event payload is assumed as -
# ----------------------------------------------------------------------
# | name | name of the event |
# | payload | |
# | parameter name... | parameter value |
# | parameter name... | parameter value |
# | parameter name... | parameter value |
# | parameter name... | parameter value |
# ----------------------------------------------------------------------
#
#
#
# At the moment, every event-triggered workflow template has a dedicated
# sensor (which can potentially be a bit wasteful in scenarios with high
# volume of workflows and low volume of events) - introducing a many-to-one
# sensor-to-workflow-template solution is completely in the realm of
# possibilities (modulo consistency and transactional guarantees).
#
# This implementation side-steps the more prominent/popular usage of event
# sensors where the sensor is responsible for submitting the workflow object
# directly. Instead we construct the equivalent behavior of `argo submit
# --from` to reference an already submitted workflow template. This ensures
# that Metaflow generated Kubernetes objects can be easily reasoned about.
#
# At the moment, Metaflow configures for webhook and NATS event sources. If you
# are interested in the HA story for either - please follow this link
# https://argoproj.github.io/argo-events/eventsources/ha/.
#
# There is some potential for confusion between Metaflow concepts and Argo
# Events concepts, particularly for event names. Argo Events EventSource
# define an event name which is different than the Metaflow event name - think
# of Argo Events name as a type of event (conceptually like topics in Kafka)
# while Metaflow event names are a field within the Argo Event.
#
#
# At the moment, there is parity between the labels and annotations for
# workflow templates and sensors - that may or may not be the case in the
# future.
#
# Unfortunately, there doesn't seem to be a way to create a sensor filter
# where one (or more) fields across multiple events have the same value.
# Imagine a scenario where we want to trigger a flow iff both the dependent
# events agree on the same date field. Unfortunately, there isn't any way in
# Argo Events (as of apr'23) to ensure that.
# Nothing to do here - let's short circuit and exit.
if not self.triggers:
return {}
# Ensure proper configuration is available for Argo Events
if ARGO_EVENTS_EVENT is None:
raise ArgoWorkflowsException(
"An Argo Event name hasn't been configured for your deployment yet. "
"Please see this article for more details on event names - "
"https://argoproj.github.io/argo-events/eventsources/naming/. "
"It is very likely that all events for your deployment share the "
"same name. You can configure it by executing "
"`metaflow configure kubernetes` or setting METAFLOW_ARGO_EVENTS_EVENT "
"in your configuration. If in doubt, reach out for support at "
"http://chat.metaflow.org"
)
# Unfortunately argo events requires knowledge of event source today.
# Hopefully, some day this requirement can be removed and events can be truly
# impervious to their source and destination.
if ARGO_EVENTS_EVENT_SOURCE is None:
raise ArgoWorkflowsException(
"An Argo Event Source name hasn't been configured for your deployment "
"yet. Please see this article for more details on event names - "
"https://argoproj.github.io/argo-events/eventsources/naming/. "
"You can configure it by executing `metaflow configure kubernetes` or "
"setting METAFLOW_ARGO_EVENTS_EVENT_SOURCE in your configuration. If "
"in doubt, reach out for support at http://chat.metaflow.org"
)
# Service accounts are a hard requirement since we utilize the
# argoWorkflow trigger for resource sensors today.
if ARGO_EVENTS_SERVICE_ACCOUNT is None:
raise ArgoWorkflowsException(
"An Argo Event service account hasn't been configured for your "
"deployment yet. Please see this article for more details on event "
"names - https://argoproj.github.io/argo-events/service-accounts/. "
"You can configure it by executing `metaflow configure kubernetes` or "
"setting METAFLOW_ARGO_EVENTS_SERVICE_ACCOUNT in your configuration. "
"If in doubt, reach out for support at http://chat.metaflow.org"
)
try:
# Kubernetes is a soft dependency for generating Argo objects.
# We can very well remove this dependency for Argo with the downside of
# adding a bunch more json bloat classes (looking at you... V1Container)
from kubernetes import client as kubernetes_sdk
except (NameError, ImportError):
raise MetaflowException(
"Could not import Python package 'kubernetes'. Install kubernetes "
"sdk (https://pypi.org/project/kubernetes/) first."
)
return (
Sensor()
.metadata(
# Sensor metadata.
ObjectMeta()
.name(ArgoWorkflows._sensor_name(self.name))
.namespace(ARGO_EVENTS_SENSOR_NAMESPACE)
.labels(self._base_labels)
.label("app.kubernetes.io/name", "metaflow-sensor")
.annotations(self._base_annotations)
)
.spec(
SensorSpec().template(
# Sensor template.
SensorTemplate()
.metadata(
ObjectMeta()
.label("app.kubernetes.io/name", "metaflow-sensor")
.label("app.kubernetes.io/part-of", "metaflow")
.annotations(self._base_annotations)
)
.container(
# Run sensor in guaranteed QoS. The sensor isn't doing a lot
# of work so we roll with minimal resource allocation. It is
# likely that in subsequent releases we will agressively lower
# sensor resources to pack more of them on a single node.
to_camelcase(
kubernetes_sdk.V1Container(
name="main",
resources=kubernetes_sdk.V1ResourceRequirements(
requests={
"cpu": "100m",
"memory": "250Mi",
},
limits={
"cpu": "100m",
"memory": "250Mi",
},
),
).to_dict()
)
)
.service_account_name(ARGO_EVENTS_SERVICE_ACCOUNT)
# TODO (savin): Handle bypassing docker image rate limit errors.
)
# Set sensor replica to 1 for now.
# TODO (savin): Allow for multiple replicas for HA.
.replicas(1)
# TODO: Support revision history limit to manage old deployments
# .revision_history_limit(...)
.event_bus_name(ARGO_EVENTS_EVENT_BUS)
# Workflow trigger.
.trigger(
Trigger().template(
TriggerTemplate(self.name)
# Trigger a deployed workflow template
.k8s_trigger(
StandardK8STrigger()
.source(
{
"resource": {
"apiVersion": "argoproj.io/v1alpha1",
"kind": "Workflow",
"metadata": {
"generateName": "%s-" % self.name,
"namespace": KUBERNETES_NAMESPACE,
# Useful to paint the UI
"annotations": {
"metaflow/triggered_by": json.dumps(
[
{
key: trigger.get(key)
for key in ["name", "type"]
}
for trigger in self.triggers
]
)
},
},
"spec": {
"arguments": {
"parameters": [
Parameter(parameter["name"])
.value(parameter["value"])
.to_json()
for parameter in self.parameters.values()
]
# Also consume event data
+ [
Parameter(event["sanitized_name"])
.value(json.dumps(None))
.to_json()
for event in self.triggers
]
},
"workflowTemplateRef": {
"name": self.name,
},
},
}
}
)
.parameters(
[
y
for x in list(
list(
TriggerParameter()
.src(
dependency_name=event["sanitized_name"],
# Technically, we don't need to create
# a payload carry-on and can stuff
# everything within the body.
# NOTE: We need the conditional logic in order to successfully fall back to the default value
# when the event payload does not contain a key for a parameter.
# NOTE: Keys might contain dashes, so use the safer 'get' for fetching the value
data_template='{{ if (hasKey $.Input.body.payload "%s") }}%s{{- else -}}{{ (fail "use-default-instead") }}{{- end -}}'
% (
v,
(
'{{- $pv:=(get $.Input.body.payload "%s") -}}{{ if kindIs "string" $pv }}{{- $pv | toRawJson -}}{{- else -}}{{ $pv | toRawJson | toRawJson }}{{- end -}}'
% v
if self.parameters[
parameter_name
]["type"]
== "JSON"
else '{{- (get $.Input.body.payload "%s" | toRawJson) -}}'
% v
),
),
# Unfortunately the sensor needs to
# record the default values for
# the parameters - there doesn't seem
# to be any way for us to skip
value=self.parameters[parameter_name][
"value"
],
)
.dest(
# this undocumented (mis?)feature in
# argo-events allows us to reference
# parameters by name rather than index
"spec.arguments.parameters.#(name=%s).value"
% parameter_name
)
for parameter_name, v in event.get(
"parameters", {}
).items()
)
for event in self.triggers
)
for y in x
]
+ [
# Map event payload to parameters for current
TriggerParameter()
.src(
dependency_name=event["sanitized_name"],
data_key="body.payload",
value=json.dumps(None),
)
.dest(
"spec.arguments.parameters.#(name=%s).value"
% event["sanitized_name"]
)
for event in self.triggers
]
)
# Reset trigger conditions ever so often by wiping
# away event tracking history on a schedule.
# @trigger(options={"reset_at": {"cron": , "timezone": }})
# timezone is IANA standard, e.g. America/Los_Angeles
# TODO: Introduce "end_of_day", "end_of_hour" ..
).conditions_reset(
cron=self.trigger_options.get("reset_at", {}).get("cron"),
timezone=self.trigger_options.get("reset_at", {}).get(
"timezone"
),
)
)
)
# Event dependencies. As of Mar' 23, Argo Events docs suggest using
# Jetstream event bus rather than NATS streaming bus since the later
# doesn't support multiple combos of the same event name and event
# source name.
.dependencies(
# Event dependencies don't entertain dots
EventDependency(event["sanitized_name"]).event_name(
ARGO_EVENTS_EVENT
)
# TODO: Alternatively fetch this from @trigger config options
.event_source_name(ARGO_EVENTS_EVENT_SOURCE).filters(
# Ensure that event name matches and all required parameter
# fields are present in the payload. There is a possibility of
# dependency on an event where none of the fields are required.
# At the moment, this event is required but the restriction
# can be removed if needed.
EventDependencyFilter().exprs(
[
{
"expr": "name == '%s'" % event["name"],
"fields": [
{"name": "name", "path": "body.payload.name"}
],
}
]
+ [
{
"expr": "true == true", # field name is present
"fields": [
{
"name": "field",
"path": "body.payload.%s" % v,
}
],
}
for parameter_name, v in event.get(
"parameters", {}
).items()
# only for required parameters
if self.parameters[parameter_name]["is_required"]
]
+ [
{
"expr": "field == '%s'" % v, # trigger_on_finish
"fields": [
{
"name": "field",
"path": "body.payload.%s" % filter_key,
}
],
}
for filter_key, v in event.get("filters", {}).items()
if v
]
)
)
for event in self.triggers
)
)
)
def list_to_prose(self, items, singular):
items = ["*%s*" % item for item in items]
item_count = len(items)
plural = singular + "s"
item_type = singular
if item_count == 1:
result = items[0]
elif item_count == 2:
result = "%s and %s" % (items[0], items[1])
item_type = plural
elif item_count > 2:
result = "%s and %s" % (
", ".join(items[0 : item_count - 1]),
items[item_count - 1],
)
item_type = plural
else:
result = ""
if result:
result = "%s %s" % (result, item_type)
return result
# Helper classes to assist with JSON-foo. This can very well replaced with an explicit
# dependency on argo-workflows Python SDK if this method turns out to be painful.
# TODO: Autogenerate them, maybe?
class WorkflowTemplate(object):
# https://argoproj.github.io/argo-workflows/fields/#workflowtemplate
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["apiVersion"] = "argoproj.io/v1alpha1"
self.payload["kind"] = "WorkflowTemplate"
def metadata(self, object_meta):
self.payload["metadata"] = object_meta.to_json()
return self
def spec(self, workflow_spec):
self.payload["spec"] = workflow_spec.to_json()
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class ObjectMeta(object):
# https://argoproj.github.io/argo-workflows/fields/#objectmeta
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def annotation(self, key, value):
self.payload["annotations"][key] = str(value)
return self
def annotations(self, annotations):
if "annotations" not in self.payload:
self.payload["annotations"] = {}
self.payload["annotations"].update(annotations)
return self
def generate_name(self, generate_name):
self.payload["generateName"] = generate_name
return self
def label(self, key, value):
self.payload["labels"][key] = str(value)
return self
def labels(self, labels):
if "labels" not in self.payload:
self.payload["labels"] = {}
self.payload["labels"].update(labels or {})
return self
def name(self, name):
self.payload["name"] = name
return self
def namespace(self, namespace):
self.payload["namespace"] = namespace
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.to_json(), indent=4)
class WorkflowStep(object):
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def name(self, name):
self.payload["name"] = str(name)
return self
def template(self, template):
self.payload["template"] = str(template)
return self
def arguments(self, arguments):
self.payload["arguments"] = arguments.to_json()
return self
def when(self, condition):
self.payload["when"] = str(condition)
return self
def step(self, expression):
self.payload["expression"] = str(expression)
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.to_json(), indent=4)
class WorkflowSpec(object):
# https://argoproj.github.io/argo-workflows/fields/#workflowspec
# This object sets all Workflow level properties.
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def active_deadline_seconds(self, active_deadline_seconds):
# Overall duration of a workflow in seconds
if active_deadline_seconds is not None:
self.payload["activeDeadlineSeconds"] = int(active_deadline_seconds)
return self
def automount_service_account_token(self, mount=True):
self.payload["automountServiceAccountToken"] = mount
return self
def arguments(self, arguments):
self.payload["arguments"] = arguments.to_json()
return self
def archive_logs(self, archive_logs=True):
self.payload["archiveLogs"] = archive_logs
return self
def entrypoint(self, entrypoint):
self.payload["entrypoint"] = entrypoint
return self
def onExit(self, on_exit_template):
if on_exit_template:
self.payload["onExit"] = on_exit_template
return self
def parallelism(self, parallelism):
# Set parallelism at Workflow level
self.payload["parallelism"] = int(parallelism)
return self
def pod_metadata(self, metadata):
self.payload["podMetadata"] = metadata.to_json()
return self
def priority(self, priority):
if priority is not None:
self.payload["priority"] = int(priority)
return self
def workflow_metadata(self, workflow_metadata):
self.payload["workflowMetadata"] = workflow_metadata.to_json()
return self
def service_account_name(self, service_account_name):
# https://argoproj.github.io/argo-workflows/workflow-rbac/
self.payload["serviceAccountName"] = service_account_name
return self
def templates(self, templates):
if "templates" not in self.payload:
self.payload["templates"] = []
for template in templates:
self.payload["templates"].append(template.to_json())
return self
def hooks(self, hooks):
# https://argoproj.github.io/argo-workflows/fields/#lifecyclehook
if "hooks" not in self.payload:
self.payload["hooks"] = {}
for k, v in hooks.items():
self.payload["hooks"].update({k: v.to_json()})
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.to_json(), indent=4)
class Metadata(object):
# https://argoproj.github.io/argo-workflows/fields/#metadata
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def annotation(self, key, value):
self.payload["annotations"][key] = str(value)
return self
def annotations(self, annotations):
if "annotations" not in self.payload:
self.payload["annotations"] = {}
self.payload["annotations"].update(annotations)
return self
def label(self, key, value):
self.payload["labels"][key] = str(value)
return self
def labels(self, labels):
if "labels" not in self.payload:
self.payload["labels"] = {}
self.payload["labels"].update(labels or {})
return self
def labels_from(self, labels_from):
# Only available in workflow_metadata
# https://github.com/argoproj/argo-workflows/blob/master/examples/label-value-from-workflow.yaml
if "labelsFrom" not in self.payload:
self.payload["labelsFrom"] = {}
for k, v in labels_from.items():
self.payload["labelsFrom"].update({k: {"expression": v}})
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.to_json(), indent=4)
class DaemonTemplate(object):
def __init__(self, name):
tree = lambda: defaultdict(tree)
self.name = name
self.payload = tree()
self.payload["daemon"] = True
self.payload["name"] = name
def container(self, container):
self.payload["container"] = container
return self
def service_account_name(self, service_account_name):
self.payload["serviceAccountName"] = service_account_name
return self
def retry_strategy(self, times, minutes_between_retries):
if times > 0:
self.payload["retryStrategy"] = {
"retryPolicy": "Always",
"limit": times,
"backoff": {"duration": "%sm" % minutes_between_retries},
}
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class Template(object):
# https://argoproj.github.io/argo-workflows/fields/#template
def __init__(self, name):
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["name"] = name
def active_deadline_seconds(self, active_deadline_seconds):
# Overall duration of a pod in seconds, only obeyed for container templates
# Used for implementing @timeout.
self.payload["activeDeadlineSeconds"] = int(active_deadline_seconds)
return self
def dag(self, dag_template):
self.payload["dag"] = dag_template.to_json()
return self
def steps(self, steps):
if "steps" not in self.payload:
self.payload["steps"] = []
# steps is a list of lists.
# hence we go over every item in the incoming list
# serialize it and then append the list to the payload
step_list = []
for step in steps:
step_list.append(step.to_json())
self.payload["steps"].append(step_list)
return self
def container(self, container):
# Luckily this can simply be V1Container and we are spared from writing more
# boilerplate - https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1Container.md.
self.payload["container"] = container
return self
def http(self, http):
self.payload["http"] = http.to_json()
return self
def inputs(self, inputs):
self.payload["inputs"] = inputs.to_json()
return self
def outputs(self, outputs):
self.payload["outputs"] = outputs.to_json()
return self
def fail_fast(self, fail_fast=True):
# https://github.com/argoproj/argo-workflows/issues/1442
self.payload["failFast"] = fail_fast
return self
def metadata(self, metadata):
self.payload["metadata"] = metadata.to_json()
return self
def service_account_name(self, service_account_name):
self.payload["serviceAccountName"] = service_account_name
return self
def retry_strategy(self, times, minutes_between_retries):
if times > 0:
self.payload["retryStrategy"] = {
"retryPolicy": "Always",
"limit": times,
"backoff": {"duration": "%sm" % minutes_between_retries},
}
return self
def empty_dir_volume(self, name, medium=None, size_limit=None):
"""
Create and attach an emptyDir volume for Kubernetes.
Parameters:
-----------
name: str
name for the volume
size_limit: int (optional)
sizeLimit (in MiB) for the volume
medium: str (optional)
storage medium of the emptyDir
"""
# Do not add volume if size is zero. Enables conditional chaining.
if size_limit == 0:
return self
# Attach an emptyDir volume
# https://argoproj.github.io/argo-workflows/empty-dir/
if "volumes" not in self.payload:
self.payload["volumes"] = []
self.payload["volumes"].append(
{
"name": name,
"emptyDir": {
# Add default unit as ours differs from Kubernetes default.
**({"sizeLimit": "{}Mi".format(size_limit)} if size_limit else {}),
**({"medium": medium} if medium else {}),
},
}
)
return self
def pvc_volumes(self, pvcs=None):
"""
Create and attach Persistent Volume Claims as volumes.
Parameters:
-----------
pvcs: Optional[Dict]
a dictionary of pvc's and the paths they should be mounted to. e.g.
{"pv-claim-1": "/mnt/path1", "pv-claim-2": "/mnt/path2"}
"""
if pvcs is None:
return self
if "volumes" not in self.payload:
self.payload["volumes"] = []
for claim in pvcs.keys():
self.payload["volumes"].append(
{"name": claim, "persistentVolumeClaim": {"claimName": claim}}
)
return self
def pod_spec_patch(self, pod_spec_patch=None):
if pod_spec_patch is None:
return self
self.payload["podSpecPatch"] = json.dumps(pod_spec_patch)
return self
def node_selectors(self, node_selectors):
if "nodeSelector" not in self.payload:
self.payload["nodeSelector"] = {}
if node_selectors:
self.payload["nodeSelector"].update(node_selectors)
return self
def tolerations(self, tolerations):
self.payload["tolerations"] = tolerations
return self
def to_json(self):
return self.payload
def resource(self, action, manifest, success_criteria, failure_criteria):
self.payload["resource"] = {}
self.payload["resource"]["action"] = action
self.payload["resource"]["setOwnerReference"] = True
self.payload["resource"]["successCondition"] = success_criteria
self.payload["resource"]["failureCondition"] = failure_criteria
self.payload["resource"]["manifest"] = manifest
return self
def __str__(self):
return json.dumps(self.payload, indent=4)
class Inputs(object):
# https://argoproj.github.io/argo-workflows/fields/#inputs
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def parameters(self, parameters):
if "parameters" not in self.payload:
self.payload["parameters"] = []
for parameter in parameters:
self.payload["parameters"].append(parameter.to_json())
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class Outputs(object):
# https://argoproj.github.io/argo-workflows/fields/#outputs
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def parameters(self, parameters):
if "parameters" not in self.payload:
self.payload["parameters"] = []
for parameter in parameters:
self.payload["parameters"].append(parameter.to_json())
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class Parameter(object):
# https://argoproj.github.io/argo-workflows/fields/#parameter
def __init__(self, name):
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["name"] = name
def value(self, value):
self.payload["value"] = value
return self
def default(self, value):
self.payload["default"] = value
return self
def valueFrom(self, value_from):
self.payload["valueFrom"] = value_from
return self
def description(self, description):
self.payload["description"] = description
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class DAGTemplate(object):
# https://argoproj.github.io/argo-workflows/fields/#dagtemplate
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def fail_fast(self, fail_fast=True):
# https://github.com/argoproj/argo-workflows/issues/1442
self.payload["failFast"] = fail_fast
return self
def tasks(self, tasks):
if "tasks" not in self.payload:
self.payload["tasks"] = []
for task in tasks:
self.payload["tasks"].append(task.to_json())
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class DAGTask(object):
# https://argoproj.github.io/argo-workflows/fields/#dagtask
def __init__(self, name):
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["name"] = name
def arguments(self, arguments):
self.payload["arguments"] = arguments.to_json()
return self
def dependencies(self, dependencies):
self.payload["dependencies"] = dependencies
return self
def depends(self, depends: str):
self.payload["depends"] = depends
return self
def template(self, template):
# Template reference
self.payload["template"] = template
return self
def inline(self, template):
# We could have inlined the template here but
# https://github.com/argoproj/argo-workflows/issues/7432 prevents us for now.
self.payload["inline"] = template.to_json()
return self
def when(self, when: str):
self.payload["when"] = when
return self
def with_param(self, with_param):
self.payload["withParam"] = with_param
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class Arguments(object):
# https://argoproj.github.io/argo-workflows/fields/#arguments
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def parameters(self, parameters):
if "parameters" not in self.payload:
self.payload["parameters"] = []
for parameter in parameters:
self.payload["parameters"].append(parameter.to_json())
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class Sensor(object):
# https://github.com/argoproj/argo-events/blob/master/api/sensor.md#argoproj.io/v1alpha1.Sensor
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["apiVersion"] = "argoproj.io/v1alpha1"
self.payload["kind"] = "Sensor"
def metadata(self, object_meta):
self.payload["metadata"] = object_meta.to_json()
return self
def spec(self, sensor_spec):
self.payload["spec"] = sensor_spec.to_json()
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class SensorSpec(object):
# https://github.com/argoproj/argo-events/blob/master/api/sensor.md#argoproj.io/v1alpha1.SensorSpec
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def replicas(self, replicas=1):
# TODO: Make number of deployment replicas configurable.
self.payload["replicas"] = int(replicas)
return self
def template(self, sensor_template):
self.payload["template"] = sensor_template.to_json()
return self
def trigger(self, trigger):
if "triggers" not in self.payload:
self.payload["triggers"] = []
self.payload["triggers"].append(trigger.to_json())
return self
def dependencies(self, dependencies):
if "dependencies" not in self.payload:
self.payload["dependencies"] = []
for dependency in dependencies:
self.payload["dependencies"].append(dependency.to_json())
return self
def event_bus_name(self, event_bus_name):
self.payload["eventBusName"] = event_bus_name
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.to_json(), indent=4)
class SensorTemplate(object):
# https://github.com/argoproj/argo-events/blob/master/api/sensor.md#argoproj.io/v1alpha1.Template
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def service_account_name(self, service_account_name):
self.payload["serviceAccountName"] = service_account_name
return self
def metadata(self, object_meta):
self.payload["metadata"] = object_meta.to_json()
return self
def container(self, container):
# Luckily this can simply be V1Container and we are spared from writing more
# boilerplate - https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1Container.md.
self.payload["container"] = container
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.to_json(), indent=4)
class EventDependency(object):
# https://github.com/argoproj/argo-events/blob/master/api/sensor.md#argoproj.io/v1alpha1.EventDependency
def __init__(self, name):
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["name"] = name
def event_source_name(self, event_source_name):
self.payload["eventSourceName"] = event_source_name
return self
def event_name(self, event_name):
self.payload["eventName"] = event_name
return self
def filters(self, event_dependency_filter):
self.payload["filters"] = event_dependency_filter.to_json()
return self
def transform(self, event_dependency_transformer=None):
if event_dependency_transformer:
self.payload["transform"] = event_dependency_transformer
return self
def filters_logical_operator(self, logical_operator):
self.payload["filtersLogicalOperator"] = logical_operator.to_json()
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.to_json(), indent=4)
class EventDependencyFilter(object):
# https://github.com/argoproj/argo-events/blob/master/api/sensor.md#argoproj.io/v1alpha1.EventDependencyFilter
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def exprs(self, exprs):
self.payload["exprs"] = exprs
return self
def context(self, event_context):
self.payload["context"] = event_context
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.to_json(), indent=4)
class Trigger(object):
# https://github.com/argoproj/argo-events/blob/master/api/sensor.md#argoproj.io/v1alpha1.Trigger
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def template(self, trigger_template):
self.payload["template"] = trigger_template.to_json()
return self
def parameters(self, trigger_parameters):
if "parameters" not in self.payload:
self.payload["parameters"] = []
for trigger_parameter in trigger_parameters:
self.payload["parameters"].append(trigger_parameter.to_json())
return self
def policy(self, trigger_policy):
self.payload["policy"] = trigger_policy.to_json()
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.to_json(), indent=4)
class TriggerTemplate(object):
# https://github.com/argoproj/argo-events/blob/master/api/sensor.md#argoproj.io/v1alpha1.TriggerTemplate
def __init__(self, name):
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["name"] = name
def k8s_trigger(self, k8s_trigger):
self.payload["k8s"] = k8s_trigger.to_json()
return self
def argo_workflow_trigger(self, argo_workflow_trigger):
self.payload["argoWorkflow"] = argo_workflow_trigger.to_json()
return self
def conditions_reset(self, cron, timezone):
if cron:
self.payload["conditionsReset"] = [
{"byTime": {"cron": cron, "timezone": timezone}}
]
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class ArgoWorkflowTrigger(object):
# https://github.com/argoproj/argo-events/blob/master/api/sensor.md#argoproj.io/v1alpha1.ArgoWorkflowTrigger
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["operation"] = "submit"
self.payload["group"] = "argoproj.io"
self.payload["version"] = "v1alpha1"
self.payload["resource"] = "workflows"
def source(self, source):
self.payload["source"] = source
return self
def parameters(self, trigger_parameters):
if "parameters" not in self.payload:
self.payload["parameters"] = []
for trigger_parameter in trigger_parameters:
self.payload["parameters"].append(trigger_parameter.to_json())
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class TriggerParameter(object):
# https://github.com/argoproj/argo-events/blob/master/api/sensor.md#argoproj.io/v1alpha1.TriggerParameter
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
def src(self, dependency_name, value, data_key=None, data_template=None):
self.payload["src"] = {
"dependencyName": dependency_name,
"dataKey": data_key,
"dataTemplate": data_template,
"value": value,
# explicitly set it to false to ensure proper deserialization
"useRawData": False,
}
return self
def dest(self, dest):
self.payload["dest"] = dest
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class StandardK8STrigger(object):
# https://pkg.go.dev/github.com/argoproj/argo-events/pkg/apis/sensor/v1alpha1#StandardK8STrigger
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["operation"] = "create"
def operation(self, operation):
self.payload["operation"] = operation
return self
def group(self, group):
self.payload["group"] = group
return self
def version(self, version):
self.payload["version"] = version
return self
def resource(self, resource):
self.payload["resource"] = resource
return self
def namespace(self, namespace):
self.payload["namespace"] = namespace
return self
def source(self, source):
self.payload["source"] = source
return self
def parameters(self, trigger_parameters):
if "parameters" not in self.payload:
self.payload["parameters"] = []
for trigger_parameter in trigger_parameters:
self.payload["parameters"].append(trigger_parameter.to_json())
return self
def live_object(self, live_object=True):
self.payload["liveObject"] = live_object
return self
def patch_strategy(self, patch_strategy):
self.payload["patchStrategy"] = patch_strategy
return self
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
================================================
FILE: metaflow/plugins/argo/argo_workflows_cli.py
================================================
import base64
import json
import platform
import re
import sys
from hashlib import sha1
from time import sleep
from metaflow import JSONType, Run, current, decorators, parameters
from metaflow._vendor import click
from metaflow.exception import (
MetaflowException,
MetaflowInternalError,
MetaflowNotFound,
)
from metaflow.metaflow_config import (
ARGO_WORKFLOWS_UI_URL,
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
KUBERNETES_NAMESPACE,
SERVICE_VERSION_CHECK,
UI_URL,
)
from metaflow.package import MetaflowPackage
# TODO: Move production_token to utils
from metaflow.plugins.aws.step_functions.production_token import (
load_token,
new_token,
store_token,
)
from metaflow.plugins.environment_decorator import EnvironmentDecorator
from metaflow.plugins.kubernetes.kubernetes_decorator import KubernetesDecorator
from metaflow.tagging_util import validate_tags
from metaflow.util import get_username, to_bytes, to_unicode, version_parse
from .argo_workflows import ArgoWorkflows, ArgoWorkflowsException
NEW_ARGO_NAMELENGTH_METAFLOW_VERSION = "2.17"
VALID_NAME = re.compile(r"^[a-z]([a-z0-9\.\-]*[a-z0-9])?$")
unsupported_decorators = {
"snowpark": "Step *%s* is marked for execution on Snowpark with Argo Workflows which isn't currently supported.",
"slurm": "Step *%s* is marked for execution on Slurm with Argo Workflows which isn't currently supported.",
"nvidia": "Step *%s* is marked for execution on Nvidia with Argo Workflows which isn't currently supported.",
"nvct": "Step *%s* is marked for execution on Nvct with Argo Workflows which isn't currently supported.",
"skypilot_step": "Step *%s* is marked for execution on Skypilot with Argo Workflows which isn't currently supported.",
}
class IncorrectProductionToken(MetaflowException):
headline = "Incorrect production token"
class RunIdMismatch(MetaflowException):
headline = "Run ID mismatch"
class IncorrectMetadataServiceVersion(MetaflowException):
headline = "Incorrect version for metaflow service"
class ArgoWorkflowsNameTooLong(MetaflowException):
headline = "Argo Workflows name too long"
class UnsupportedPythonVersion(MetaflowException):
headline = "Unsupported version of Python"
@click.group()
def cli():
pass
@cli.group(help="Commands related to Argo Workflows.")
@click.option(
"--name",
default=None,
type=str,
help="Argo Workflow name. The flow name is used instead if "
"this option is not specified.",
)
@click.pass_obj
def argo_workflows(obj, name=None):
check_python_version(obj)
obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
(
obj.workflow_name,
obj.token_prefix,
obj.is_project,
obj._is_workflow_name_modified,
obj._exception_on_create, # exception_on_create is used to prevent deploying new flows with too long names via --name
) = resolve_workflow_name_v2(obj, name)
# Backward compatibility for Metaflow versions <=2.16 because of
# change in name length restrictions in Argo Workflows from 253 to 52
# characters.
(
obj._v1_workflow_name,
obj._v1_is_workflow_name_modified,
) = resolve_workflow_name_v1(obj, name)
@argo_workflows.command(help="Deploy a new version of this workflow to Argo Workflows.")
@click.option(
"--authorize",
default=None,
help="Authorize using this production token. You need this "
"when you are re-deploying an existing flow for the first "
"time. The token is cached in METAFLOW_HOME, so you only "
"need to specify this once.",
)
@click.option(
"--generate-new-token",
is_flag=True,
help="Generate a new production token for this flow. "
"This will move the production flow to a new namespace.",
)
@click.option(
"--new-token",
"given_token",
default=None,
help="Use the given production token for this flow. "
"This will move the production flow to the given namespace.",
)
@click.option(
"--tag",
"tags",
multiple=True,
default=None,
help="Annotate all objects produced by Argo Workflows runs "
"with the given tag. You can specify this option multiple "
"times to attach multiple tags.",
)
@click.option(
"--namespace",
"user_namespace",
default=None,
help="Change the namespace from the default (production token) "
"to the given tag. See run --help for more information.",
)
@click.option(
"--only-json",
is_flag=True,
default=False,
help="Only print out JSON sent to Argo Workflows. Do not deploy anything.",
hidden=True,
)
@click.option(
"--max-workers",
default=100,
show_default=True,
help="Maximum number of parallel processes.",
)
@click.option(
"--workflow-timeout", default=None, type=int, help="Workflow timeout in seconds."
)
@click.option(
"--workflow-priority",
default=None,
type=int,
help="Workflow priority as an integer. Workflows with higher priority "
"are processed first if Argo Workflows controller is configured to process "
"limited number of workflows in parallel",
)
@click.option(
"--auto-emit-argo-events/--no-auto-emit-argo-events",
default=True, # TODO: Default to a value from config
show_default=True,
help="Auto emits Argo Events when the run completes successfully.",
)
@click.option(
"--notify-on-error/--no-notify-on-error",
default=False,
show_default=True,
help="Notify if the workflow fails.",
)
@click.option(
"--notify-on-success/--no-notify-on-success",
default=False,
show_default=True,
help="Notify if the workflow succeeds.",
)
@click.option(
"--notify-slack-webhook-url",
default=None,
help="Slack incoming webhook url for workflow success/failure notifications.",
)
@click.option(
"--notify-pager-duty-integration-key",
default=None,
help="PagerDuty Events API V2 Integration key for workflow success/failure notifications.",
)
@click.option(
"--notify-incident-io-api-key",
default=None,
help="Incident.io API V2 key for workflow success/failure notifications.",
)
@click.option(
"--incident-io-alert-source-config-id",
default=None,
help="Incident.io Alert source config ID. Example '01GW2G3V0S59R238FAHPDS1R66'",
)
@click.option(
"--incident-io-metadata",
default=None,
type=str,
multiple=True,
help="Incident.io Alert Custom Metadata field in the form of Key=Value",
)
@click.option(
"--enable-heartbeat-daemon/--no-enable-heartbeat-daemon",
default=False,
show_default=True,
help="Use a daemon container to broadcast heartbeats.",
)
@click.option(
"--deployer-attribute-file",
default=None,
show_default=True,
type=str,
help="Write the workflow name to the file specified. Used internally for Metaflow's Deployer API.",
hidden=True,
)
@click.option(
"--enable-error-msg-capture/--no-enable-error-msg-capture",
default=True,
show_default=True,
help="Capture stack trace of first failed task in exit hook.",
)
@click.option(
"--workflow-title",
default=None,
type=str,
help="Custom title for the workflow displayed in Argo Workflows UI. Defaults to `project_flow_name`. Supports markdown formatting.",
)
@click.option(
"--workflow-description",
default=None,
type=str,
help="Custom description for the workflow displayed in Argo Workflows UI. Defaults to the flow's docstring if available. Supports markdown formatting and multi-line text.",
)
@click.pass_obj
def create(
obj,
tags=None,
user_namespace=None,
only_json=False,
authorize=None,
generate_new_token=False,
given_token=None,
max_workers=None,
workflow_timeout=None,
workflow_priority=None,
auto_emit_argo_events=False,
notify_on_error=False,
notify_on_success=False,
notify_slack_webhook_url=None,
notify_pager_duty_integration_key=None,
notify_incident_io_api_key=None,
incident_io_alert_source_config_id=None,
incident_io_metadata=None,
enable_heartbeat_daemon=True,
workflow_title=None,
workflow_description=None,
deployer_attribute_file=None,
enable_error_msg_capture=False,
):
# check if we are supposed to block deploying the flow due to name length constraints.
if obj._exception_on_create is not None:
raise obj._exception_on_create
# TODO: Remove this once we have a proper validator system in place
for node in obj.graph:
for decorator, error_message in unsupported_decorators.items():
if any([d.name == decorator for d in node.decorators]):
raise MetaflowException(error_message % node.name)
validate_tags(tags)
if deployer_attribute_file:
with open(deployer_attribute_file, "w", encoding="utf-8") as f:
json.dump(
{
"name": obj.workflow_name,
"flow_name": obj.flow.name,
"metadata": obj.metadata.metadata_str(),
},
f,
)
obj.echo("Deploying *%s* to Argo Workflows..." % obj.flow.name, bold=True)
if only_json:
# When only generating JSON, we skip cluster access operations:
# - Metadata service version check (requires service access)
# - Token resolution (requires Kubernetes cluster access to check existing deployments)
# Instead, we use a placeholder token since the JSON is just for inspection.
token = "__PLACEHOLDER_PRODUCTION_TOKEN__"
if given_token:
if obj.is_project:
# we rely on a known prefix for @project tokens, so we can't
# allow the user to specify a custom token with an arbitrary prefix
raise MetaflowException(
"--new-token is not supported for @projects. Use --generate-new-token "
"to create a new token."
)
if given_token.startswith("production:"):
given_token = given_token[11:]
token = given_token
obj.echo("")
obj.echo("Using the given token, *%s*." % token)
if generate_new_token:
token = new_token(obj.token_prefix, None)
if token is None:
raise MetaflowException(
"--generate-new-token option is not supported after using "
"--new-token. Use --new-token to make a new namespace."
)
obj.echo("")
obj.echo("A new production token generated.")
else:
if SERVICE_VERSION_CHECK:
# TODO: Consider dispelling with this check since it's been 2 years since the
# needed metadata service changes have been available in open-source. It's
# likely that Metaflow users may not have access to metadata service from
# within their workstations.
check_metadata_service_version(obj)
token = resolve_token(
obj.workflow_name,
obj.token_prefix,
obj,
authorize,
given_token,
generate_new_token,
obj.is_project,
)
flow = make_flow(
obj,
token,
obj.workflow_name,
tags,
user_namespace,
max_workers,
workflow_timeout,
workflow_priority,
auto_emit_argo_events,
notify_on_error,
notify_on_success,
notify_slack_webhook_url,
notify_pager_duty_integration_key,
notify_incident_io_api_key,
incident_io_alert_source_config_id,
incident_io_metadata,
enable_heartbeat_daemon,
enable_error_msg_capture,
workflow_title,
workflow_description,
)
if only_json:
obj.echo_always(str(flow), err=False, no_bold=True)
# TODO: Support echo-ing Argo Events Sensor template
else:
flow.deploy()
obj.echo(
"Workflow *{workflow_name}* "
"for flow *{name}* deployed to "
"Argo Workflows successfully.\n".format(
workflow_name=obj.workflow_name, name=current.flow_name
),
bold=True,
)
if obj._is_workflow_name_modified:
obj.echo(
"Note that the flow was deployed with a modified name "
"due to Kubernetes naming conventions on Argo Workflows. The "
"original flow name is stored in the workflow annotations.\n",
wrap=True,
)
if obj.workflow_name != obj._v1_workflow_name:
# Delete the old workflow if it exists
try:
ArgoWorkflows.delete(obj._v1_workflow_name)
obj.echo("Important!", bold=True, nl=False)
obj.echo(
" To comply with new naming restrictions on Argo "
"Workflows, this deployment replaced the previously "
"deployed workflow {v1_workflow_name}.\n".format(
v1_workflow_name=obj._v1_workflow_name
),
wrap=True,
)
except ArgoWorkflowsException as e:
# TODO: Catch a more specific exception
pass
obj.echo("Warning! ", bold=True, nl=False)
obj.echo(
"Due to new naming restrictions on Argo Workflows, "
"re-deploying this flow with older versions of Metaflow (<{version}) "
"will result in the flow being deployed with a different name -\n"
"*{v1_workflow_name}* without replacing the version you just deployed. "
"This may result in duplicate executions of this flow. To avoid this issue, "
"always deploy this flow using Metaflow ≥{version} or specify the flow name with --name.".format(
v1_workflow_name=obj._v1_workflow_name,
version=NEW_ARGO_NAMELENGTH_METAFLOW_VERSION,
),
wrap=True,
)
if ARGO_WORKFLOWS_UI_URL:
obj.echo("See the deployed workflow here:", bold=True)
argo_workflowtemplate_link = "%s/workflow-templates/%s" % (
ARGO_WORKFLOWS_UI_URL.rstrip("/"),
KUBERNETES_NAMESPACE,
)
obj.echo(
"%s/%s\n\n" % (argo_workflowtemplate_link, obj.workflow_name),
indent=True,
)
flow.schedule()
obj.echo("What will trigger execution of the workflow:", bold=True)
obj.echo(flow.trigger_explanation(), indent=True)
# TODO: Print events emitted by execution of this flow
# response = ArgoWorkflows.trigger(obj.workflow_name)
# run_id = "argo-" + response["metadata"]["name"]
# obj.echo(
# "Workflow *{name}* triggered on Argo Workflows "
# "(run-id *{run_id}*).".format(name=obj.workflow_name, run_id=run_id),
# bold=True,
# )
def check_python_version(obj):
# argo-workflows integration for Metaflow isn't supported for Py versions below 3.6.
# This constraint can very well be lifted if desired.
if sys.version_info < (3, 6):
obj.echo("")
obj.echo(
"Metaflow doesn't support Argo Workflows for Python %s right now."
% platform.python_version()
)
obj.echo(
"Please upgrade your Python interpreter to version 3.6 (or higher) or "
"reach out to us at slack.outerbounds.co for more help."
)
raise UnsupportedPythonVersion(
"Try again with a more recent version of Python (>=3.6)."
)
def check_metadata_service_version(obj):
metadata = obj.metadata
version = metadata.version()
if version == "local":
return
elif version is not None and version_parse(version) >= version_parse("2.0.2"):
# Metaflow metadata service needs to be at least at version 2.0.2
# since prior versions did not support strings as object ids.
return
else:
obj.echo("")
obj.echo(
"You are running a version of the metaflow service that currently doesn't "
"support Argo Workflows. "
)
obj.echo(
"For more information on how to upgrade your service to a compatible "
"version (>= 2.0.2), visit:"
)
obj.echo(
" https://docs.outerbounds.com/engineering/operations/migration/",
fg="green",
)
obj.echo(
"Once you have upgraded your metadata service, please re-execute your "
"command."
)
raise IncorrectMetadataServiceVersion(
"Try again with a more recent version of metaflow service (>=2.0.2)."
)
# Argo Workflows has a few restrictions on workflow names:
# - Argo Workflow Template names can't be longer than 253 characters since
# they follow DNS Subdomain name restrictions.
# - Argo Workflows stores workflow template names as a label in the workflow
# template metadata - workflows.argoproj.io/workflow-template, which follows
# RFC 1123, which is a strict subset of DNS Subdomain names and allows for
# 63 characters.
# - Argo Workflows appends a unix timestamp to the workflow name when the workflow
# is created (-1243856725) from a workflow template deployed as a cron workflow template
# reducing the number of characters available to 52.
# - TODO: Check naming restrictions for Argo Events.
# In summary -
# - We truncate the workflow name to 45 characters to leave enough room for future
# enhancements to the Argo Workflows integration.
# - We remove any underscores since Argo Workflows doesn't allow them.
# - We convert the name to lower case.
# - We remove + and @ as not allowed characters, which can be part of the
# project branch due to using email addresses as user names.
# - We append a hash of the workflow name to the end to make it unique.
# A complication here is that in previous versions of Metaflow (=<2.16), the limit was a
# rather lax 253 characters - so we have two issues to contend with:
# 1. Replacing any equivalent flows deployed using previous versions of Metaflow which
# adds a bit of complexity to the business logic.
# 2. Breaking Metaflow users who have multiple versions of Metaflow floating in their
# organization. Imagine a scenario, where metaflow-v1 (253 chars) deploys the same
# flow which was previously deployed using the new metaflow-v2 (45 chars) - the user
# will end up with two workflows templates instead of one since metaflow-v1 has no
# awareness of the new name truncation logic introduced by metaflow-v2. Unfortunately,
# there is no way to avoid this scenario - so we will do our best to message to the
# user to not use an older version of Metaflow to redeploy affected flows.
# ------------------------------------------------------------------------------------------
# | metaflow-v1 (253 chars) | metaflow-v2 (45 chars) | Result |
# ------------------------------------------------------------------------------------------
# | workflow_name_modified = True | workflow_name_modified = False | Not possible |
# ------------------------------------------------------------------------------------------
# | workflow_name_modified = False | workflow_name_modified = True | Messaging needed |
# ------------------------------------------------------------------------------------------
# | workflow_name_modified = False | workflow_name_modified = False | No message needed |
# ------------------------------------------------------------------------------------------
# | workflow_name_modified = True | workflow_name_modified = True | Messaging needed |
# ------------------------------------------------------------------------------------------
def resolve_workflow_name_v1(obj, name):
# models the workflow_name calculation logic in Metaflow versions =<2.16
# important!! - should stay static including any future bugs
project = current.get("project_name")
is_workflow_name_modified = False
if project:
if name:
return None, False # not possible in versions =<2.16
workflow_name = current.project_flow_name
if len(workflow_name) > 253:
name_hash = to_unicode(
base64.b32encode(sha1(to_bytes(workflow_name)).digest())
)[:8].lower()
workflow_name = "%s-%s" % (workflow_name[:242], name_hash)
is_workflow_name_modified = True
if not VALID_NAME.search(workflow_name):
workflow_name = (
re.compile(r"^[^A-Za-z0-9]+")
.sub("", workflow_name)
.replace("_", "")
.replace("@", "")
.replace("+", "")
.lower()
)
is_workflow_name_modified = True
else:
if name and not VALID_NAME.search(name):
return None, False # not possible in versions =<2.16
workflow_name = name if name else current.flow_name
if len(workflow_name) > 253:
return None, False # not possible in versions =<2.16
if not VALID_NAME.search(workflow_name):
# Note - since the original name sanitization was a surjective
# mapping, using it here is a bug, but we leave this in
# place since the usage of v1_workflow_name is to generate
# historical workflow names, so we need to replicate all
# the bugs too :'(
workflow_name = (
re.compile(r"^[^A-Za-z0-9]+")
.sub("", workflow_name)
.replace("_", "")
.replace("@", "")
.replace("+", "")
.lower()
)
is_workflow_name_modified = True
return workflow_name, is_workflow_name_modified
def resolve_workflow_name_v2(obj, name):
# current logic for imputing workflow_name
limit = 45
project = current.get("project_name")
is_workflow_name_modified = False
exception_on_create = None
if project:
if name:
raise MetaflowException(
"--name is not supported for @projects. Use --branch instead."
)
workflow_name = current.project_flow_name
project_branch = to_bytes(".".join((project, current.branch_name)))
token_prefix = (
"mfprj-%s"
% to_unicode(base64.b32encode(sha1(project_branch).digest()))[:16]
)
is_project = True
if len(workflow_name) > limit:
name_hash = to_unicode(
base64.b32encode(sha1(to_bytes(workflow_name)).digest())
)[:5].lower()
# Generate a meaningful short name
project_name = project
branch_name = current.branch_name
flow_name = current.flow_name
parts = [project_name, branch_name, flow_name]
max_name_len = limit - 6
min_each = 7
total_len = sum(len(p) for p in parts)
remaining = max_name_len - 3 * min_each
extras = [int(remaining * len(p) / total_len) for p in parts]
while sum(extras) < remaining:
extras[extras.index(min(extras))] += 1
budgets = [min_each + e for e in extras]
proj_budget = budgets[0]
if len(project_name) <= proj_budget:
proj_str = project_name
else:
h = proj_budget // 2
t = proj_budget - h
proj_str = project_name[:h] + project_name[-t:]
branch_budget = budgets[1]
branch_str = branch_name[:branch_budget]
flow_budget = budgets[2]
if len(flow_name) <= flow_budget:
flow_str = flow_name
else:
h = flow_budget // 2
t = flow_budget - h
flow_str = flow_name[:h] + flow_name[-t:]
descriptive_name = sanitize_for_argo(
"%s.%s.%s" % (proj_str, branch_str, flow_str)
)
workflow_name = "%s-%s" % (descriptive_name, name_hash)
is_workflow_name_modified = True
else:
if name and not VALID_NAME.search(name):
raise MetaflowException(
"Name '%s' contains invalid characters. The "
"name must consist of lower case alphanumeric characters, '-' or '.'"
", and must start with an alphabetic character, "
"and end with an alphanumeric character." % name
)
workflow_name = name if name else current.flow_name
token_prefix = workflow_name
is_project = False
if len(workflow_name) > limit:
# NOTE: We could have opted for truncating names specified by --name and flow_name
# as well, but chose to error instead due to the expectation that users would
# be intentionally explicit in their naming, and truncating these would lose
# information they intended to encode in the deployment.
exception_on_create = ArgoWorkflowsNameTooLong(
"The full name of the workflow:\n*%s*\nis longer than %s "
"characters.\n\n"
"To deploy this workflow to Argo Workflows, please "
"assign a shorter name\nusing the option\n"
"*argo-workflows --name create*." % (name, limit)
)
if not VALID_NAME.search(workflow_name):
# NOTE: Even though sanitize_for_argo is surjective which can result in collisions,
# we still use it here since production tokens guard against name collisions
# and if we made it injective, metaflow 2.17 will result in every deployed
# flow's name changing, significantly increasing the blast radius of the change.
workflow_name = sanitize_for_argo(workflow_name)
is_workflow_name_modified = True
return (
workflow_name,
token_prefix.lower(),
is_project,
is_workflow_name_modified,
exception_on_create,
)
def make_flow(
obj,
token,
name,
tags,
namespace,
max_workers,
workflow_timeout,
workflow_priority,
auto_emit_argo_events,
notify_on_error,
notify_on_success,
notify_slack_webhook_url,
notify_pager_duty_integration_key,
notify_incident_io_api_key,
incident_io_alert_source_config_id,
incident_io_metadata,
enable_heartbeat_daemon,
enable_error_msg_capture,
workflow_title,
workflow_description,
):
# TODO: Make this check less specific to Amazon S3 as we introduce
# support for more cloud object stores.
if obj.flow_datastore.TYPE not in ("azure", "gs", "s3"):
raise MetaflowException(
"Argo Workflows requires --datastore=s3 or --datastore=azure or --datastore=gs"
)
if (notify_on_error or notify_on_success) and not (
notify_slack_webhook_url
or notify_pager_duty_integration_key
or notify_incident_io_api_key
):
raise MetaflowException(
"Notifications require specifying an incoming Slack webhook url via --notify-slack-webhook-url, PagerDuty events v2 integration key via --notify-pager-duty-integration-key or\n"
"Incident.io integration API key via --notify-incident-io-api-key.\n"
" If you would like to set up notifications for your Slack workspace, follow the instructions at "
"https://api.slack.com/messaging/webhooks to generate a webhook url.\n"
" For notifications through PagerDuty, generate an integration key by following the instructions at "
"https://support.pagerduty.com/docs/services-and-integrations#create-a-generic-events-api-integration\n"
" For notifications through Incident.io, generate an alert source config."
)
if (
(notify_on_error or notify_on_success)
and notify_incident_io_api_key
and incident_io_alert_source_config_id is None
):
raise MetaflowException(
"Incident.io alerts require an alert source configuration ID. Please set one with --incident-io-alert-source-config-id"
)
# Attach @kubernetes and @environment decorator to the flow to
# ensure that the related decorator hooks are invoked.
decorators._attach_decorators(
obj.flow, [KubernetesDecorator.name, EnvironmentDecorator.name]
)
decorators._process_late_attached_decorator(
[KubernetesDecorator.name, EnvironmentDecorator.name],
obj.flow,
obj.graph,
obj.environment,
obj.flow_datastore,
obj.logger,
)
obj.graph = obj.flow._graph
# Save the code package in the flow datastore so that both user code and
# metaflow package can be retrieved during workflow execution.
obj.package = MetaflowPackage(
obj.flow,
obj.environment,
obj.echo,
suffixes=obj.package_suffixes,
flow_datastore=obj.flow_datastore if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE else None,
)
# This blocks until the package is created
if FEAT_ALWAYS_UPLOAD_CODE_PACKAGE:
package_url = obj.package.package_url()
package_sha = obj.package.package_sha()
else:
package_url, package_sha = obj.flow_datastore.save_data(
[obj.package.blob], len_hint=1
)[0]
return ArgoWorkflows(
name,
obj.graph,
obj.flow,
obj.package.package_metadata,
package_sha,
package_url,
token,
obj.metadata,
obj.flow_datastore,
obj.environment,
obj.event_logger,
obj.monitor,
tags=tags,
namespace=namespace,
max_workers=max_workers,
username=get_username(),
workflow_timeout=workflow_timeout,
workflow_priority=workflow_priority,
auto_emit_argo_events=auto_emit_argo_events,
notify_on_error=notify_on_error,
notify_on_success=notify_on_success,
notify_slack_webhook_url=notify_slack_webhook_url,
notify_pager_duty_integration_key=notify_pager_duty_integration_key,
notify_incident_io_api_key=notify_incident_io_api_key,
incident_io_alert_source_config_id=incident_io_alert_source_config_id,
incident_io_metadata=incident_io_metadata,
enable_heartbeat_daemon=enable_heartbeat_daemon,
enable_error_msg_capture=enable_error_msg_capture,
workflow_title=workflow_title,
workflow_description=workflow_description,
)
# TODO: Unify this method with the one in step_functions_cli.py
def resolve_token(
name, token_prefix, obj, authorize, given_token, generate_new_token, is_project
):
# 1) retrieve the previous deployment, if one exists
workflow = ArgoWorkflows.get_existing_deployment(name)
if workflow is None:
obj.echo(
"It seems this is the first time you are deploying *%s* to "
"Argo Workflows." % name
)
prev_token = None
else:
prev_user, prev_token = workflow
# 2) authorize this deployment
if prev_token is not None:
if authorize is None:
authorize = load_token(token_prefix)
elif authorize.startswith("production:"):
authorize = authorize[11:]
# we allow the user who deployed the previous version to re-deploy,
# even if they don't have the token
if prev_user != get_username() and authorize != prev_token:
obj.echo(
"There is an existing version of *%s* on Argo Workflows which was "
"deployed by the user *%s*." % (name, prev_user)
)
obj.echo(
"To deploy a new version of this flow, you need to use the same "
"production token that they used. "
)
obj.echo(
"Please reach out to them to get the token. Once you have it, call "
"this command:"
)
obj.echo(" argo-workflows create --authorize MY_TOKEN", fg="green")
obj.echo(
'See "Organizing Results" at docs.metaflow.org for more information '
"about production tokens."
)
raise IncorrectProductionToken(
"Try again with the correct production token."
)
# 3) do we need a new token or should we use the existing token?
if given_token:
if is_project:
# we rely on a known prefix for @project tokens, so we can't
# allow the user to specify a custom token with an arbitrary prefix
raise MetaflowException(
"--new-token is not supported for @projects. Use --generate-new-token "
"to create a new token."
)
if given_token.startswith("production:"):
given_token = given_token[11:]
token = given_token
obj.echo("")
obj.echo("Using the given token, *%s*." % token)
elif prev_token is None or generate_new_token:
token = new_token(token_prefix, prev_token)
if token is None:
if prev_token is None:
raise MetaflowInternalError(
"We could not generate a new token. This is unexpected. "
)
else:
raise MetaflowException(
"--generate-new-token option is not supported after using "
"--new-token. Use --new-token to make a new namespace."
)
obj.echo("")
obj.echo("A new production token generated.")
else:
token = prev_token
obj.echo("")
obj.echo("The namespace of this production flow is")
obj.echo(" production:%s" % token, fg="green")
obj.echo(
"To analyze results of this production flow add this line in your notebooks:"
)
obj.echo(' namespace("production:%s")' % token, fg="green")
obj.echo(
"If you want to authorize other people to deploy new versions of this flow to "
"Argo Workflows, they need to call"
)
obj.echo(" argo-workflows create --authorize %s" % token, fg="green")
obj.echo("when deploying this flow to Argo Workflows for the first time.")
obj.echo(
'See "Organizing Results" at https://docs.metaflow.org/ for more '
"information about production tokens."
)
obj.echo("")
store_token(token_prefix, token)
return token
@parameters.add_custom_parameters(deploy_mode=False)
@argo_workflows.command(help="Trigger the workflow on Argo Workflows.")
@click.option(
"--run-id-file",
default=None,
show_default=True,
type=str,
help="Write the ID of this run to the file specified.",
)
@click.option(
"--deployer-attribute-file",
default=None,
show_default=True,
type=str,
help="Write the metadata and pathspec of this run to the file specified.\nUsed internally for Metaflow's Deployer API.",
hidden=True,
)
@click.pass_obj
def trigger(obj, run_id_file=None, deployer_attribute_file=None, **kwargs):
def _convert_value(param):
# Swap `-` with `_` in parameter name to match click's behavior
val = kwargs.get(param.name.replace("-", "_").lower())
if param.kwargs.get("type") == JSONType:
val = json.dumps(val)
elif isinstance(val, parameters.DelayedEvaluationParameter):
val = val(return_str=True)
return val
params = {
param.name: _convert_value(param)
for _, param in obj.flow._get_parameters()
if kwargs.get(param.name.replace("-", "_").lower()) is not None
}
workflow_name_to_deploy = obj.workflow_name
# For users that upgraded the client but did not redeploy their flow,
# we fallback to old workflow names in case of a conflict.
if obj.workflow_name != obj._v1_workflow_name:
# use the old name only if there exists a deployment.
if ArgoWorkflows.get_existing_deployment(obj._v1_workflow_name):
obj.echo("Warning! ", bold=True, nl=False)
obj.echo(
"Found a deployment of this flow with an old style name, defaulted to triggering *%s*."
% obj._v1_workflow_name,
wrap=True,
)
obj.echo(
"Due to new naming restrictions on Argo Workflows, "
"this flow will have a shorter name with newer versions of Metaflow (>=%s) "
"which will allow it to be triggered through Argo UI as well. "
% NEW_ARGO_NAMELENGTH_METAFLOW_VERSION,
wrap=True,
)
obj.echo("re-deploy your flow in order to get rid of this message.")
workflow_name_to_deploy = obj._v1_workflow_name
response = ArgoWorkflows.trigger(workflow_name_to_deploy, params)
run_id = "argo-" + response["metadata"]["name"]
if run_id_file:
with open(run_id_file, "w") as f:
f.write(str(run_id))
if deployer_attribute_file:
with open(deployer_attribute_file, "w") as f:
json.dump(
{
"name": workflow_name_to_deploy,
"metadata": obj.metadata.metadata_str(),
"pathspec": "/".join((obj.flow.name, run_id)),
},
f,
)
obj.echo(
"Workflow *{name}* triggered on Argo Workflows "
"(run-id *{run_id}*).".format(name=workflow_name_to_deploy, run_id=run_id),
bold=True,
)
run_url = (
"%s/%s/%s" % (UI_URL.rstrip("/"), obj.flow.name, run_id) if UI_URL else None
)
if run_url:
obj.echo(
"See the run in the UI at %s" % run_url,
bold=True,
)
@argo_workflows.command(help="Delete the flow on Argo Workflows.")
@click.option(
"--authorize",
default=None,
type=str,
help="Authorize the deletion with a production token",
)
@click.pass_obj
def delete(obj, authorize=None):
def _token_instructions(flow_name, prev_user):
obj.echo(
"There is an existing version of *%s* on Argo Workflows which was "
"deployed by the user *%s*." % (flow_name, prev_user)
)
obj.echo(
"To delete this flow, you need to use the same production token that they used."
)
obj.echo(
"Please reach out to them to get the token. Once you have it, call "
"this command:"
)
obj.echo(" argo-workflows delete --authorize MY_TOKEN", fg="green")
obj.echo(
'See "Organizing Results" at docs.metaflow.org for more information '
"about production tokens."
)
# Cases and expected behaviours:
# old name exists, new name does not exist -> delete old and do not fail on missing new
# old name exists, new name exists -> delete both
# old name does not exist, new name exists -> only try to delete new
# old name does not exist, new name does not exist -> keep previous behaviour where missing deployment raises error for the new name.
def _delete(workflow_name):
validate_token(workflow_name, obj.token_prefix, authorize, _token_instructions)
obj.echo("Deleting workflow *{name}*...".format(name=workflow_name), bold=True)
schedule_deleted, sensor_deleted, workflow_deleted = ArgoWorkflows.delete(
workflow_name
)
if schedule_deleted:
obj.echo(
"Deleting cronworkflow *{name}*...".format(name=workflow_name),
bold=True,
)
if sensor_deleted:
obj.echo(
"Deleting sensor *{name}*...".format(name=workflow_name),
bold=True,
)
return workflow_deleted
workflows_deleted = False
cleanup_old_name = False
if obj.workflow_name != obj._v1_workflow_name:
# Only add the old name if there exists a deployment with such name.
# This is due to the way validate_token is tied to an existing deployment.
if ArgoWorkflows.get_existing_deployment(obj._v1_workflow_name) is not None:
cleanup_old_name = True
obj.echo(
"This flow has been deployed with another name in the past due to a limitation with Argo Workflows. "
"Will also delete the older deployment.",
wrap=True,
)
_delete(obj._v1_workflow_name)
workflows_deleted = True
# Always try to delete the current name.
# Do not raise exception if we deleted old name before this.
try:
_delete(obj.workflow_name)
workflows_deleted = True
except ArgoWorkflowsException:
if not cleanup_old_name:
raise
if workflows_deleted:
obj.echo(
"Deleting Kubernetes resources may take a while. "
"Deploying the flow again to Argo Workflows while the delete is in-flight will fail."
)
obj.echo(
"In-flight executions will not be affected. "
"If necessary, terminate them manually."
)
@argo_workflows.command(help="Suspend flow execution on Argo Workflows.")
@click.option(
"--authorize",
default=None,
type=str,
help="Authorize the suspension with a production token",
)
@click.argument("run-id", required=True, type=str)
@click.pass_obj
def suspend(obj, run_id, authorize=None):
def _token_instructions(flow_name, prev_user):
obj.echo(
"There is an existing version of *%s* on Argo Workflows which was "
"deployed by the user *%s*." % (flow_name, prev_user)
)
obj.echo(
"To suspend this flow, you need to use the same production token that they used."
)
obj.echo(
"Please reach out to them to get the token. Once you have it, call "
"this command:"
)
obj.echo(" argo-workflows suspend RUN_ID --authorize MY_TOKEN", fg="green")
obj.echo(
'See "Organizing Results" at docs.metaflow.org for more information '
"about production tokens."
)
workflows = _get_existing_workflow_names(obj)
for workflow_name in workflows:
validate_run_id(
workflow_name, obj.token_prefix, authorize, run_id, _token_instructions
)
# Trim prefix from run_id
name = run_id[5:]
workflow_suspended = ArgoWorkflows.suspend(name)
if workflow_suspended:
obj.echo("Suspended execution of *%s*" % run_id)
break # no need to try out all workflow_names if we found the running one.
@argo_workflows.command(help="Unsuspend flow execution on Argo Workflows.")
@click.option(
"--authorize",
default=None,
type=str,
help="Authorize the unsuspend with a production token",
)
@click.argument("run-id", required=True, type=str)
@click.pass_obj
def unsuspend(obj, run_id, authorize=None):
def _token_instructions(flow_name, prev_user):
obj.echo(
"There is an existing version of *%s* on Argo Workflows which was "
"deployed by the user *%s*." % (flow_name, prev_user)
)
obj.echo(
"To unsuspend this flow, you need to use the same production token that they used."
)
obj.echo(
"Please reach out to them to get the token. Once you have it, call "
"this command:"
)
obj.echo(
" argo-workflows unsuspend RUN_ID --authorize MY_TOKEN",
fg="green",
)
obj.echo(
'See "Organizing Results" at docs.metaflow.org for more information '
"about production tokens."
)
workflows = _get_existing_workflow_names(obj)
for workflow_name in workflows:
validate_run_id(
workflow_name, obj.token_prefix, authorize, run_id, _token_instructions
)
# Trim prefix from run_id
name = run_id[5:]
workflow_suspended = ArgoWorkflows.unsuspend(name)
if workflow_suspended:
obj.echo("Unsuspended execution of *%s*" % run_id)
break # no need to try all workflow_names if we found one.
def validate_token(name, token_prefix, authorize, instructions_fn=None):
"""
Validate that the production token matches that of the deployed flow.
In case both the user and token do not match, raises an error.
Optionally outputs instructions on token usage via the provided instruction_fn(flow_name, prev_user)
"""
# TODO: Unify this with the existing resolve_token implementation.
# 1) retrieve the previous deployment, if one exists
workflow = ArgoWorkflows.get_existing_deployment(name)
if workflow is None:
prev_token = None
else:
prev_user, prev_token = workflow
# 2) authorize this deployment
if prev_token is not None:
if authorize is None:
authorize = load_token(token_prefix)
elif authorize.startswith("production:"):
authorize = authorize[11:]
# we allow the user who deployed the previous version to re-deploy,
# even if they don't have the token
# NOTE: The username is visible in multiple sources, and can be set by the user.
# Should we consider being stricter here?
if prev_user != get_username() and authorize != prev_token:
if instructions_fn:
instructions_fn(flow_name=name, prev_user=prev_user)
raise IncorrectProductionToken(
"Try again with the correct production token."
)
# 3) all validations passed, store the previous token for future use
token = prev_token
store_token(token_prefix, token)
return True
def get_run_object(pathspec: str):
try:
return Run(pathspec, _namespace_check=False)
except MetaflowNotFound:
return None
def get_status_considering_run_object(status, run_obj):
remapped_status = remap_status(status)
if remapped_status == "Running" and run_obj is None:
return "Pending"
return remapped_status
@argo_workflows.command(help="Fetch flow execution status on Argo Workflows.")
@click.argument("run-id", required=True, type=str)
@click.pass_obj
def status(obj, run_id):
if not run_id.startswith("argo-"):
raise RunIdMismatch(
"Run IDs for flows executed through Argo Workflows begin with 'argo-'"
)
obj.echo(
"Fetching status for run *{run_id}* for {flow_name} ...".format(
run_id=run_id, flow_name=obj.flow.name
),
bold=True,
)
# Trim prefix from run_id
name = run_id[5:]
status = ArgoWorkflows.get_workflow_status(obj.flow.name, name)
run_obj = get_run_object("/".join((obj.flow.name, run_id)))
if status is not None:
status = get_status_considering_run_object(status, run_obj)
obj.echo_always(status)
@argo_workflows.command(help="Terminate flow execution on Argo Workflows.")
@click.option(
"--authorize",
default=None,
type=str,
help="Authorize the termination with a production token",
)
@click.argument("run-id", required=True, type=str)
@click.pass_obj
def terminate(obj, run_id, authorize=None):
def _token_instructions(flow_name, prev_user):
obj.echo(
"There is an existing version of *%s* on Argo Workflows which was "
"deployed by the user *%s*." % (flow_name, prev_user)
)
obj.echo(
"To terminate this flow, you need to use the same production token that they used."
)
obj.echo(
"Please reach out to them to get the token. Once you have it, call "
"this command:"
)
obj.echo(" argo-workflows terminate --authorize MY_TOKEN RUN_ID", fg="green")
obj.echo(
'See "Organizing Results" at docs.metaflow.org for more information '
"about production tokens."
)
workflows = _get_existing_workflow_names(obj)
for workflow_name in workflows:
validate_run_id(
workflow_name, obj.token_prefix, authorize, run_id, _token_instructions
)
# Trim prefix from run_id
name = run_id[5:]
obj.echo(
"Terminating run *{run_id}* for {flow_name} ...".format(
run_id=run_id, flow_name=obj.flow.name
),
bold=True,
)
terminated = ArgoWorkflows.terminate(obj.flow.name, name)
if terminated:
obj.echo("\nRun terminated.")
break # no need to try all workflow_names if we found the running one.
@argo_workflows.command(help="List Argo Workflow templates for the flow.")
@click.option(
"--all",
default=False,
is_flag=True,
type=bool,
help="list all Argo Workflow Templates (not just limited to this flow)",
)
@click.pass_obj
def list_workflow_templates(obj, all=None):
for template_name in ArgoWorkflows.list_templates(obj.flow.name, all):
obj.echo_always(template_name)
# Internal CLI command to run a heartbeat daemon in an Argo Workflows Daemon container.
@argo_workflows.command(hidden=True, help="start heartbeat process for a run")
@click.option("--run_id", required=True)
@click.option(
"--tag",
"tags",
multiple=True,
default=None,
help="Annotate all objects produced by Argo Workflows runs "
"with the given tag. You can specify this option multiple "
"times to attach multiple tags.",
)
@click.pass_obj
def heartbeat(obj, run_id, tags=None):
# Try to register a run in case the start task has not taken care of it yet.
obj.metadata.register_run_id(run_id, tags)
# Start run heartbeat
obj.metadata.start_run_heartbeat(obj.flow.name, run_id)
# Keepalive loop
while True:
# Do not pollute daemon logs with anything unnecessary,
# as they might be extremely long running.
sleep(10)
def validate_run_id(
workflow_name, token_prefix, authorize, run_id, instructions_fn=None
):
"""
Validates that a run_id adheres to the Argo Workflows naming rules, and
that it belongs to the current flow (accounting for project branch as well).
"""
# Verify that user is trying to change an Argo workflow
if not run_id.startswith("argo-"):
raise RunIdMismatch(
"Run IDs for flows executed through Argo Workflows begin with 'argo-'"
)
# Verify that run_id belongs to the Flow, and that branches match
name = run_id[5:]
workflow = ArgoWorkflows.get_execution(name)
if workflow is None:
raise MetaflowException("Could not find workflow *%s* on Argo Workflows" % name)
owner, token, flow_name, branch_name, project_name = workflow
# Verify we are operating on the correct Flow file compared to the running one.
# Without this check, using --name could be used to run commands for arbitrary run_id's, disregarding the Flow in the file.
if current.flow_name != flow_name:
raise RunIdMismatch(
"The workflow with the run_id *%s* belongs to the flow *%s*, not for the flow *%s*."
% (run_id, flow_name, current.flow_name)
)
if project_name is not None:
# Verify we are operating on the correct project.
if current.get("project_name") != project_name:
raise RunIdMismatch(
"The workflow belongs to the project *%s*. "
"Please use the project decorator or --name to target the correct project"
% project_name
)
# Verify we are operating on the correct branch.
if current.get("branch_name") != branch_name:
raise RunIdMismatch(
"The workflow belongs to the branch *%s*. "
"Please use --branch, --production or --name to target the correct branch"
% branch_name
)
# Verify that the production tokens match. We do not want to cache the token that was used though,
# as the operations that require run_id validation can target runs not authored from the local environment
if authorize is None:
authorize = load_token(token_prefix)
elif authorize.startswith("production:"):
authorize = authorize[11:]
if owner != get_username() and authorize != token:
if instructions_fn:
instructions_fn(flow_name=name, prev_user=owner)
raise IncorrectProductionToken("Try again with the correct production token.")
return True
def _get_existing_workflow_names(obj):
"""
Construct a list of the current workflow name and possible existing deployments of old workflow names
"""
workflows = [obj.workflow_name]
if obj.workflow_name != obj._v1_workflow_name:
# Only add the old name if there exists a deployment with such name.
# This is due to the way validate_token is tied to an existing deployment.
if ArgoWorkflows.get_existing_deployment(obj._v1_workflow_name) is not None:
workflows.append(obj._v1_workflow_name)
return workflows
def sanitize_for_argo(text):
"""
Sanitizes a string so it does not contain characters that are not permitted in
Argo Workflow resource names.
"""
sanitized = (
re.compile(r"^[^A-Za-z0-9]+")
.sub("", text)
.replace("_", "")
.replace("@", "")
.replace("+", "")
.lower()
)
# This is added in order to get sanitized and truncated project branch names to adhere to RFC 1123 subdomain requirements
# f.ex. after truncation a project flow name might be project.branch-cut-short-.flowname
# sanitize around the . separators by removing any non-alphanumeric characters
sanitized = re.compile(r"[^a-z0-9]*\.[^a-z0-9]*").sub(".", sanitized)
return sanitized
def remap_status(status):
"""
Group similar Argo Workflow statuses together in order to have similar output to step functions statuses.
"""
STATUS_MAP = {"Error": "Failed"}
return STATUS_MAP.get(status, status)
================================================
FILE: metaflow/plugins/argo/argo_workflows_decorator.py
================================================
import json
import os
from metaflow import current
from metaflow.decorators import StepDecorator
from metaflow.events import Trigger
from metaflow.metadata_provider import MetaDatum
from metaflow.graph import FlowGraph
from metaflow.flowspec import FlowSpec
from .argo_events import ArgoEvent
class ArgoWorkflowsInternalDecorator(StepDecorator):
name = "argo_workflows_internal"
defaults = {"auto-emit-argo-events": True}
def task_pre_step(
self,
step_name,
task_datastore,
metadata,
run_id,
task_id,
flow,
graph,
retry_count,
max_user_code_retries,
ubf_context,
inputs,
):
self.task_id = task_id
self.run_id = run_id
triggers = []
# Expose event triggering metadata through current singleton
for key, payload in os.environ.items():
if key.startswith("METAFLOW_ARGO_EVENT_PAYLOAD_"):
if payload != "null": # Argo-Workflow's None
try:
payload = json.loads(payload)
except (TypeError, ValueError):
# There could be arbitrary events that Metaflow doesn't know of
payload = {}
triggers.append(
{
"timestamp": payload.get("timestamp"),
"id": payload.get("id"),
"name": payload.get("name"), # will exist since filter
"type": key[len("METAFLOW_ARGO_EVENT_PAYLOAD_") :].split(
"_", 1
)[
0
], # infer type from env var key
# Add more event metadata here in the future
}
)
meta = {}
if triggers:
# Enable current.trigger
current._update_env({"trigger": Trigger(triggers)})
# Luckily there aren't many events for us to be concerned about the
# size of the metadata field yet! However we don't really need this
# metadata outside of the start step so we can save a few bytes in the
# db.
if step_name == "start":
meta["execution-triggers"] = json.dumps(triggers)
meta["argo-workflow-template"] = os.environ["ARGO_WORKFLOW_TEMPLATE"]
meta["argo-workflow-name"] = os.environ["ARGO_WORKFLOW_NAME"]
meta["argo-workflow-namespace"] = os.environ["ARGO_WORKFLOW_NAMESPACE"]
meta["auto-emit-argo-events"] = self.attributes["auto-emit-argo-events"]
meta["argo-workflow-template-owner"] = os.environ["METAFLOW_OWNER"]
entries = [
MetaDatum(
field=k, value=v, type=k, tags=["attempt_id:{0}".format(retry_count)]
)
for k, v in meta.items()
]
# Register book-keeping metadata for debugging.
metadata.register_metadata(run_id, step_name, task_id, entries)
def task_finished(
self,
step_name,
flow: FlowSpec,
graph: FlowGraph,
is_task_ok,
retry_count,
max_user_code_retries,
):
if not is_task_ok:
# The task finished with an exception - execution won't
# continue so no need to do anything here.
return
# For `foreach`s, we need to dump the cardinality of the fanout
# into a file so that Argo Workflows can properly configure
# the subsequent fanout task via an Output parameter
#
# Docker and PNS workflow executors can get output parameters from the base
# layer (e.g. /tmp), but the Kubelet nor the K8SAPI nor the emissary executors
# can. It is also unlikely we can get output parameters from the base layer if
# we run pods with a security context. We work around this constraint by
# mounting an emptyDir volume.
if graph[step_name].type == "foreach":
if graph[step_name].parallel_foreach:
# If a node is marked as a `parallel_foreach`, pass down the value of
# `num_parallel` to the subsequent steps.
with open("/mnt/out/num_parallel", "w") as f:
json.dump(flow._parallel_ubf_iter.num_parallel, f)
# Set splits to 1 since parallelism is handled by JobSet.
flow._foreach_num_splits = 1
with open("/mnt/out/task_id_entropy", "w") as file:
import uuid
file.write(uuid.uuid4().hex[:6])
with open("/mnt/out/splits", "w") as file:
json.dump(list(range(flow._foreach_num_splits)), file)
with open("/mnt/out/split_cardinality", "w") as file:
json.dump(flow._foreach_num_splits, file)
# For conditional branches we need to record the value of the switch to disk, in order to pass it as an
# output from the switching step to be used further down the DAG
if graph[step_name].type == "split-switch":
# TODO: A nicer way to access the chosen step?
_out_funcs, _ = flow._transition
chosen_step = _out_funcs[0]
with open("/mnt/out/switch_step", "w") as file:
file.write(chosen_step)
# For steps that have a `@parallel` decorator set to them, we will be relying on Jobsets
# to run the task. In this case, we cannot set anything in the
# `/mnt/out` directory, since such form of output mounts are not available to Jobset executions.
if not graph[step_name].parallel_step:
# Unfortunately, we can't always use pod names as task-ids since the pod names
# are not static across retries. We write the task-id to a file that is read
# by the next task here.
with open("/mnt/out/task_id", "w") as file:
file.write(self.task_id)
# Emit Argo Events given that the flow has succeeded. Given that we only
# emit events when the task succeeds, we can piggy back on this decorator
# hook which is guaranteed to execute only after rest of the task has
# finished execution.
if self.attributes["auto-emit-argo-events"]:
# Event name is set to metaflow.project.branch.step so that users can
# place explicit dependencies on namespaced events. Also, argo events
# sensors don't allow for filtering against absent fields - which limits
# our ability to subset non-project namespaced events.
# TODO: Check length limits for fields in Argo Events
event = ArgoEvent(
name="metaflow.%s.%s"
% (current.get("project_flow_name", flow.name), step_name)
)
# There should only be one event generated even when the task is retried.
# Take care to only add to the list and not modify existing values.
event.add_to_payload("id", current.pathspec)
event.add_to_payload("pathspec", current.pathspec)
event.add_to_payload("flow_name", flow.name)
event.add_to_payload("run_id", self.run_id)
event.add_to_payload("step_name", step_name)
event.add_to_payload("task_id", self.task_id)
# Add @project decorator related fields. These are used to subset
# @trigger_on_finish related filters.
for key in (
"project_name",
"branch_name",
"is_user_branch",
"is_production",
"project_flow_name",
):
if current.get(key):
event.add_to_payload(key, current.get(key))
# Add more fields here...
event.add_to_payload("auto-generated-by-metaflow", True)
# Keep in mind that any errors raised here will fail the run but the task
# will still be marked as success. That's why we explicitly swallow any
# errors and instead print them to std.err.
event.safe_publish(ignore_errors=True)
================================================
FILE: metaflow/plugins/argo/argo_workflows_deployer.py
================================================
from typing import Any, ClassVar, Dict, Optional, TYPE_CHECKING, Type
from metaflow.runner.deployer_impl import DeployerImpl
if TYPE_CHECKING:
import metaflow.plugins.argo.argo_workflows_deployer_objects
class ArgoWorkflowsDeployer(DeployerImpl):
"""
Deployer implementation for Argo Workflows.
Parameters
----------
name : str, optional, default None
Argo workflow name. The flow name is used instead if this option is not specified.
"""
TYPE: ClassVar[Optional[str]] = "argo-workflows"
def __init__(self, deployer_kwargs: Dict[str, str], **kwargs):
"""
Initialize the ArgoWorkflowsDeployer.
Parameters
----------
deployer_kwargs : Dict[str, str]
The deployer-specific keyword arguments.
**kwargs : Any
Additional arguments to pass to the superclass constructor.
"""
self._deployer_kwargs = deployer_kwargs
super().__init__(**kwargs)
@property
def deployer_kwargs(self) -> Dict[str, Any]:
return self._deployer_kwargs
@staticmethod
def deployed_flow_type() -> (
Type[
"metaflow.plugins.argo.argo_workflows_deployer_objects.ArgoWorkflowsDeployedFlow"
]
):
from .argo_workflows_deployer_objects import ArgoWorkflowsDeployedFlow
return ArgoWorkflowsDeployedFlow
def create(
self, **kwargs
) -> "metaflow.plugins.argo.argo_workflows_deployer_objects.ArgoWorkflowsDeployedFlow":
"""
Create a new ArgoWorkflow deployment.
Parameters
----------
authorize : str, optional, default None
Authorize using this production token. Required when re-deploying an existing flow
for the first time. The token is cached in METAFLOW_HOME.
generate_new_token : bool, optional, default False
Generate a new production token for this flow. Moves the production flow to a new namespace.
given_token : str, optional, default None
Use the given production token for this flow. Moves the production flow to the given namespace.
tags : List[str], optional, default None
Annotate all objects produced by Argo Workflows runs with these tags.
user_namespace : str, optional, default None
Change the namespace from the default (production token) to the given tag.
only_json : bool, optional, default False
Only print out JSON sent to Argo Workflows without deploying anything.
max_workers : int, optional, default 100
Maximum number of parallel processes.
workflow_timeout : int, optional, default None
Workflow timeout in seconds.
workflow_priority : int, optional, default None
Workflow priority as an integer. Higher priority workflows are processed first
if Argo Workflows controller is configured to process limited parallel workflows.
auto_emit_argo_events : bool, optional, default True
Auto emits Argo Events when the run completes successfully.
notify_on_error : bool, optional, default False
Notify if the workflow fails.
notify_on_success : bool, optional, default False
Notify if the workflow succeeds.
notify_slack_webhook_url : str, optional, default ''
Slack incoming webhook url for workflow success/failure notifications.
notify_pager_duty_integration_key : str, optional, default ''
PagerDuty Events API V2 Integration key for workflow success/failure notifications.
enable_heartbeat_daemon : bool, optional, default False
Use a daemon container to broadcast heartbeats.
deployer_attribute_file : str, optional, default None
Write the workflow name to the specified file. Used internally for Metaflow's Deployer API.
enable_error_msg_capture : bool, optional, default True
Capture stack trace of first failed task in exit hook.
Returns
-------
ArgoWorkflowsDeployedFlow
The Flow deployed to Argo Workflows.
"""
# Prevent circular import
from .argo_workflows_deployer_objects import ArgoWorkflowsDeployedFlow
return self._create(ArgoWorkflowsDeployedFlow, **kwargs)
_addl_stubgen_modules = ["metaflow.plugins.argo.argo_workflows_deployer_objects"]
================================================
FILE: metaflow/plugins/argo/argo_workflows_deployer_objects.py
================================================
import sys
import json
import time
import tempfile
from typing import ClassVar, Optional
from metaflow.client.core import get_metadata
from metaflow.exception import MetaflowException
from metaflow.plugins.argo.argo_client import ArgoClient
from metaflow.metaflow_config import KUBERNETES_NAMESPACE
from metaflow.plugins.argo.argo_workflows import ArgoWorkflows
from metaflow.runner.deployer import (
Deployer,
DeployedFlow,
TriggeredRun,
generate_fake_flow_file_contents,
)
from metaflow.runner.utils import get_lower_level_group, handle_timeout, temporary_fifo
class ArgoWorkflowsTriggeredRun(TriggeredRun):
"""
A class representing a triggered Argo Workflow execution.
"""
def suspend(self, **kwargs) -> bool:
"""
Suspend the running workflow.
Parameters
----------
authorize : str, optional, default None
Authorize the suspension with a production token.
Returns
-------
bool
True if the command was successful, False otherwise.
"""
_, run_id = self.pathspec.split("/")
# every subclass needs to have `self.deployer_kwargs`
command = get_lower_level_group(
self.deployer.api,
self.deployer.top_level_kwargs,
self.deployer.TYPE,
self.deployer.deployer_kwargs,
).suspend(run_id=run_id, **kwargs)
pid = self.deployer.spm.run_command(
[sys.executable, *command],
env=self.deployer.env_vars,
cwd=self.deployer.cwd,
show_output=self.deployer.show_output,
)
command_obj = self.deployer.spm.get(pid)
command_obj.sync_wait()
return command_obj.process.returncode == 0
def unsuspend(self, **kwargs) -> bool:
"""
Unsuspend the suspended workflow.
Parameters
----------
authorize : str, optional, default None
Authorize the unsuspend with a production token.
Returns
-------
bool
True if the command was successful, False otherwise.
"""
_, run_id = self.pathspec.split("/")
# every subclass needs to have `self.deployer_kwargs`
command = get_lower_level_group(
self.deployer.api,
self.deployer.top_level_kwargs,
self.deployer.TYPE,
self.deployer.deployer_kwargs,
).unsuspend(run_id=run_id, **kwargs)
pid = self.deployer.spm.run_command(
[sys.executable, *command],
env=self.deployer.env_vars,
cwd=self.deployer.cwd,
show_output=self.deployer.show_output,
)
command_obj = self.deployer.spm.get(pid)
command_obj.sync_wait()
return command_obj.process.returncode == 0
def terminate(self, **kwargs) -> bool:
"""
Terminate the running workflow.
Parameters
----------
authorize : str, optional, default None
Authorize the termination with a production token.
Returns
-------
bool
True if the command was successful, False otherwise.
"""
_, run_id = self.pathspec.split("/")
# every subclass needs to have `self.deployer_kwargs`
command = get_lower_level_group(
self.deployer.api,
self.deployer.top_level_kwargs,
self.deployer.TYPE,
self.deployer.deployer_kwargs,
).terminate(run_id=run_id, **kwargs)
pid = self.deployer.spm.run_command(
[sys.executable, *command],
env=self.deployer.env_vars,
cwd=self.deployer.cwd,
show_output=self.deployer.show_output,
)
command_obj = self.deployer.spm.get(pid)
command_obj.sync_wait()
return command_obj.process.returncode == 0
def wait_for_completion(
self, check_interval: int = 5, timeout: Optional[int] = None
):
"""
Wait for the workflow to complete or timeout.
Parameters
----------
check_interval: int, default: 5
Frequency of checking for workflow completion, in seconds.
timeout : int, optional, default None
Maximum time in seconds to wait for workflow completion.
If None, waits indefinitely.
Raises
------
TimeoutError
If the workflow does not complete within the specified timeout period.
"""
start_time = time.time()
while self.is_running:
if timeout is not None and (time.time() - start_time) > timeout:
raise TimeoutError(
"Workflow did not complete within specified timeout."
)
time.sleep(check_interval)
@property
def is_running(self):
"""
Check if the workflow is currently running.
Returns
-------
bool
True if the workflow status is either 'Pending' or 'Running',
False otherwise.
"""
workflow_status = self.status
# full list of all states present here:
# https://github.com/argoproj/argo-workflows/blob/main/pkg/apis/workflow/v1alpha1/workflow_types.go#L54
# we only consider non-terminal states to determine if the workflow has not finished
return workflow_status is not None and workflow_status in ["Pending", "Running"]
@property
def status(self) -> Optional[str]:
"""
Get the status of the triggered run.
Returns
-------
str, optional
The status of the workflow considering the run object, or None if
the status could not be retrieved.
"""
from metaflow.plugins.argo.argo_workflows_cli import (
get_status_considering_run_object,
)
flow_name, run_id = self.pathspec.split("/")
name = run_id[5:]
status = ArgoWorkflows.get_workflow_status(flow_name, name)
if status is not None:
return get_status_considering_run_object(status, self.run)
return None
class ArgoWorkflowsDeployedFlow(DeployedFlow):
"""
A class representing a deployed Argo Workflow template.
"""
TYPE: ClassVar[Optional[str]] = "argo-workflows"
@classmethod
def list_deployed_flows(cls, flow_name: Optional[str] = None):
"""
List all deployed Argo Workflow templates.
Parameters
----------
flow_name : str, optional, default None
If specified, only list deployed flows for this specific flow name.
If None, list all deployed flows.
Yields
------
ArgoWorkflowsDeployedFlow
`ArgoWorkflowsDeployedFlow` objects representing deployed
workflow templates on Argo Workflows.
"""
from metaflow.plugins.argo.argo_workflows import ArgoWorkflows
# When flow_name is None, use all=True to get all templates
# When flow_name is specified, use all=False to filter by flow_name
all_templates = flow_name is None
for template_name in ArgoWorkflows.list_templates(
flow_name=flow_name, all=all_templates
):
try:
deployed_flow = cls.from_deployment(template_name)
yield deployed_flow
except Exception:
# Skip templates that can't be converted to DeployedFlow objects
continue
@classmethod
def from_deployment(cls, identifier: str, metadata: Optional[str] = None):
"""
Retrieves a `ArgoWorkflowsDeployedFlow` object from an identifier and optional
metadata.
Parameters
----------
identifier : str
Deployer specific identifier for the workflow to retrieve
metadata : str, optional, default None
Optional deployer specific metadata.
Returns
-------
ArgoWorkflowsDeployedFlow
A `ArgoWorkflowsDeployedFlow` object representing the
deployed flow on argo workflows.
"""
client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
workflow_template = client.get_workflow_template(identifier)
if workflow_template is None:
raise MetaflowException("No deployed flow found for: %s" % identifier)
metadata_annotations = workflow_template.get("metadata", {}).get(
"annotations", {}
)
flow_name = metadata_annotations.get("metaflow/flow_name", "")
username = metadata_annotations.get("metaflow/owner", "")
parameters = json.loads(metadata_annotations.get("metaflow/parameters", "{}"))
# these two only exist if @project decorator is used..
branch_name = metadata_annotations.get("metaflow/branch_name", None)
project_name = metadata_annotations.get("metaflow/project_name", None)
project_kwargs = {}
if branch_name is not None:
if branch_name.startswith("prod."):
project_kwargs["production"] = True
project_kwargs["branch"] = branch_name[len("prod.") :]
elif branch_name.startswith("test."):
project_kwargs["branch"] = branch_name[len("test.") :]
elif branch_name == "prod":
project_kwargs["production"] = True
fake_flow_file_contents = generate_fake_flow_file_contents(
flow_name=flow_name, param_info=parameters, project_name=project_name
)
with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as fake_flow_file:
with open(fake_flow_file.name, "w") as fp:
fp.write(fake_flow_file_contents)
if branch_name is not None:
d = Deployer(
fake_flow_file.name,
env={"METAFLOW_USER": username},
**project_kwargs,
).argo_workflows()
else:
d = Deployer(
fake_flow_file.name, env={"METAFLOW_USER": username}
).argo_workflows(name=identifier)
d.name = identifier
d.flow_name = flow_name
if metadata is None:
d.metadata = get_metadata()
else:
d.metadata = metadata
return cls(deployer=d)
@classmethod
def get_triggered_run(
cls, identifier: str, run_id: str, metadata: Optional[str] = None
):
"""
Retrieves a `ArgoWorkflowsTriggeredRun` object from an identifier, a run id and
optional metadata.
Parameters
----------
identifier : str
Deployer specific identifier for the workflow to retrieve
run_id : str
Run ID for the which to fetch the triggered run object
metadata : str, optional, default None
Optional deployer specific metadata.
Returns
-------
ArgoWorkflowsTriggeredRun
A `ArgoWorkflowsTriggeredRun` object representing the
triggered run on argo workflows.
"""
deployed_flow_obj = cls.from_deployment(identifier, metadata)
return ArgoWorkflowsTriggeredRun(
deployer=deployed_flow_obj.deployer,
content=json.dumps(
{
"metadata": deployed_flow_obj.deployer.metadata,
"pathspec": "/".join(
(deployed_flow_obj.deployer.flow_name, run_id)
),
"name": run_id,
}
),
)
@property
def production_token(self) -> Optional[str]:
"""
Get the production token for the deployed flow.
Returns
-------
str, optional
The production token, None if it cannot be retrieved.
"""
try:
_, production_token = ArgoWorkflows.get_existing_deployment(
self.deployer.name
)
return production_token
except TypeError:
return None
def delete(self, **kwargs) -> bool:
"""
Delete the deployed workflow template.
Parameters
----------
authorize : str, optional, default None
Authorize the deletion with a production token.
Returns
-------
bool
True if the command was successful, False otherwise.
"""
command = get_lower_level_group(
self.deployer.api,
self.deployer.top_level_kwargs,
self.deployer.TYPE,
self.deployer.deployer_kwargs,
).delete(**kwargs)
pid = self.deployer.spm.run_command(
[sys.executable, *command],
env=self.deployer.env_vars,
cwd=self.deployer.cwd,
show_output=self.deployer.show_output,
)
command_obj = self.deployer.spm.get(pid)
command_obj.sync_wait()
return command_obj.process.returncode == 0
def trigger(self, **kwargs) -> ArgoWorkflowsTriggeredRun:
"""
Trigger a new run for the deployed flow.
Parameters
----------
**kwargs : Any
Additional arguments to pass to the trigger command,
`Parameters` in particular.
Returns
-------
ArgoWorkflowsTriggeredRun
The triggered run instance.
Raises
------
Exception
If there is an error during the trigger process.
"""
with temporary_fifo() as (attribute_file_path, attribute_file_fd):
# every subclass needs to have `self.deployer_kwargs`
command = get_lower_level_group(
self.deployer.api,
self.deployer.top_level_kwargs,
self.deployer.TYPE,
self.deployer.deployer_kwargs,
).trigger(deployer_attribute_file=attribute_file_path, **kwargs)
pid = self.deployer.spm.run_command(
[sys.executable, *command],
env=self.deployer.env_vars,
cwd=self.deployer.cwd,
show_output=self.deployer.show_output,
)
command_obj = self.deployer.spm.get(pid)
content = handle_timeout(
attribute_file_fd, command_obj, self.deployer.file_read_timeout
)
command_obj.sync_wait()
if command_obj.process.returncode == 0:
return ArgoWorkflowsTriggeredRun(
deployer=self.deployer, content=content
)
raise Exception(
"Error triggering %s on %s for %s"
% (
self.deployer.name,
self.deployer.TYPE,
self.deployer.flow_file,
)
)
================================================
FILE: metaflow/plugins/argo/capture_error.py
================================================
import json
import os
from datetime import datetime, timezone
###
# Algorithm to determine 1st error:
# ignore the failures where message = ""
# group the failures via templateName
# sort each group by finishedAt
# find the group for which the last finishedAt is earliest
# if the earliest message is "No more retries left" then
# get the n-1th message from that group
# else
# return the last message.
###
def parse_workflow_failures():
failures = json.loads(
json.loads(os.getenv("METAFLOW_ARGO_WORKFLOW_FAILURES", "[]"), strict=False),
strict=False,
)
return [wf for wf in failures if wf.get("message")]
def group_failures_by_template(failures):
groups = {}
for failure in failures:
if failure.get("finishedAt", None) is None:
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
failure["finishedAt"] = timestamp
groups.setdefault(failure["templateName"], []).append(failure)
return groups
def sort_by_finished_at(items):
return sorted(
items, key=lambda x: datetime.strptime(x["finishedAt"], "%Y-%m-%dT%H:%M:%SZ")
)
def find_earliest_last_finished_group(groups):
return min(
groups,
key=lambda k: datetime.strptime(
groups[k][-1]["finishedAt"], "%Y-%m-%dT%H:%M:%SZ"
),
)
def determine_first_error():
failures = parse_workflow_failures()
if not failures:
return None
grouped_failures = group_failures_by_template(failures)
for group in grouped_failures.values():
group.sort(
key=lambda g: datetime.strptime(g["finishedAt"], "%Y-%m-%dT%H:%M:%SZ")
)
earliest_group = grouped_failures[
find_earliest_last_finished_group(grouped_failures)
]
if earliest_group[-1]["message"] == "No more retries left":
return earliest_group[-2]
return earliest_group[-1]
if __name__ == "__main__":
first_err = determine_first_error()
print(json.dumps(first_err, indent=2))
================================================
FILE: metaflow/plugins/argo/conditional_input_paths.py
================================================
from math import inf
import sys
from metaflow.util import decompress_list, compress_list
import base64
def generate_input_paths(input_paths, skippable_steps):
# => run_id/step/:foo,bar
# input_paths are base64 encoded due to Argo shenanigans
try:
decoded = base64.b64decode(input_paths).decode("utf-8")
except Exception:
# depending on graph structure, input_paths might not be base64 encoded inside foreach tasks.
decoded = input_paths
paths = decompress_list(decoded)
# some of the paths are going to be malformed due to never having executed per conditional.
# strip these out of the list.
# all pathspecs of leading steps that executed.
trimmed = [path for path in paths if not "{{" in path]
# If the input-path is from a conditional, we want to pick the one that is last-in-line in the DAG.
# The order of graph parsing ensures that the steps are in reverse order of occurrence, so the first one is the latest.
latest_conditional_in_graph = trimmed[:1]
# pathspecs of leading steps that are conditional, and should be used instead of non-conditional ones
# e.g. the case of skipping switches: start -> case_step -> conditional_a or end
conditionals = [
path for path in trimmed if not any(step in path for step in skippable_steps)
]
pathspecs_to_use = conditionals if conditionals else latest_conditional_in_graph
return compress_list(pathspecs_to_use, zlibmin=inf)
if __name__ == "__main__":
input_paths = sys.argv[1]
try:
skippable_steps = sys.argv[2].split(",")
except IndexError:
skippable_steps = []
print(generate_input_paths(input_paths, skippable_steps))
================================================
FILE: metaflow/plugins/argo/exit_hooks.py
================================================
from collections import defaultdict
import json
from typing import Dict, List, Optional
class JsonSerializable(object):
def to_json(self):
return self.payload
def __str__(self):
return json.dumps(self.payload, indent=4)
class _LifecycleHook(JsonSerializable):
# https://argoproj.github.io/argo-workflows/fields/#lifecyclehook
def __init__(self, name):
tree = lambda: defaultdict(tree)
self.name = name
self.payload = tree()
def expression(self, expression):
self.payload["expression"] = str(expression)
return self
def template(self, template):
self.payload["template"] = template
return self
class _Template(JsonSerializable):
# https://argoproj.github.io/argo-workflows/fields/#template
def __init__(self, name):
tree = lambda: defaultdict(tree)
self.name = name
self.payload = tree()
self.payload["name"] = name
def http(self, http):
self.payload["http"] = http.to_json()
return self
def script(self, script):
self.payload["script"] = script.to_json()
return self
def container(self, container):
self.payload["container"] = container
return self
def service_account_name(self, service_account_name):
self.payload["serviceAccountName"] = service_account_name
return self
class Hook(object):
"""
Abstraction for Argo Workflows exit hooks.
A hook consists of a Template, and one or more LifecycleHooks that trigger the template
"""
template: "_Template"
lifecycle_hooks: List["_LifecycleHook"]
class _HttpSpec(JsonSerializable):
# https://argoproj.github.io/argo-workflows/fields/#http
def __init__(self, method):
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["method"] = method
self.payload["headers"] = []
def header(self, header, value):
self.payload["headers"].append({"name": header, "value": value})
return self
def body(self, body):
self.payload["body"] = str(body)
return self
def url(self, url):
self.payload["url"] = url
return self
def success_condition(self, success_condition):
self.payload["successCondition"] = success_condition
return self
# HTTP hook
class HttpExitHook(Hook):
def __init__(
self,
name: str,
url: str,
method: str = "GET",
headers: Optional[Dict] = None,
body: Optional[str] = None,
on_success: bool = False,
on_error: bool = False,
):
self.template = _Template(name)
http = _HttpSpec(method).url(url)
if headers is not None:
for header, value in headers.items():
http.header(header, value)
if body is not None:
http.body(body)
self.template.http(http)
self.lifecycle_hooks = []
if on_success and on_error:
raise Exception("Set only one of the on_success/on_error at a time.")
if on_success:
self.lifecycle_hooks.append(
_LifecycleHook(name)
.expression("workflow.status == 'Succeeded'")
.template(self.template.name)
)
if on_error:
self.lifecycle_hooks.append(
_LifecycleHook(name)
.expression("workflow.status == 'Error' || workflow.status == 'Failed'")
.template(self.template.name)
)
if not on_success and not on_error:
# add an expressionless lifecycle hook
self.lifecycle_hooks.append(_LifecycleHook(name).template(name))
class ExitHookHack(Hook):
# Warning: terrible hack to workaround a bug in Argo Workflow where the
# templates listed above do not execute unless there is an
# explicit exit hook. as and when this bug is patched, we should
# remove this effectively no-op template.
# Note: We use the Http template because changing this to an actual no-op container had the side-effect of
# leaving LifecycleHooks in a pending state even when they have finished execution.
def __init__(
self,
url,
headers=None,
body=None,
):
self.template = _Template("exit-hook-hack")
http = _HttpSpec("GET").url(url)
if headers is not None:
for header, value in headers.items():
http.header(header, value)
if body is not None:
http.body(json.dumps(body))
http.success_condition("true == true")
self.template.http(http)
self.lifecycle_hooks = []
# add an expressionless lifecycle hook
self.lifecycle_hooks.append(_LifecycleHook("exit").template("exit-hook-hack"))
class ContainerHook(Hook):
def __init__(
self,
name: str,
container: Dict,
service_account_name: str = None,
on_success: bool = False,
on_error: bool = False,
):
self.template = _Template(name)
if service_account_name is not None:
self.template.service_account_name(service_account_name)
self.template.container(container)
self.lifecycle_hooks = []
if on_success and on_error:
raise Exception("Set only one of the on_success/on_error at a time.")
if on_success:
self.lifecycle_hooks.append(
_LifecycleHook(name)
.expression("workflow.status == 'Succeeded'")
.template(self.template.name)
)
if on_error:
self.lifecycle_hooks.append(
_LifecycleHook(name)
.expression("workflow.status == 'Error' || workflow.status == 'Failed'")
.template(self.template.name)
)
if not on_success and not on_error:
# add an expressionless lifecycle hook
self.lifecycle_hooks.append(_LifecycleHook(name).template(name))
================================================
FILE: metaflow/plugins/argo/generate_input_paths.py
================================================
import sys
from hashlib import md5
def generate_input_paths(step_name, timestamp, input_paths, split_cardinality):
# => run_id/step/:foo,bar
run_id = input_paths.split("/")[0]
foreach_base_id = "{}-{}-{}".format(step_name, timestamp, input_paths)
ids = [_generate_task_id(foreach_base_id, i) for i in range(int(split_cardinality))]
return "{}/{}/:{}".format(run_id, step_name, ",".join(ids))
def _generate_task_id(base, idx):
# For foreach splits generate the expected input-paths based on split_cardinality and base_id.
# newline required at the end due to 'echo' appending one in the shell side task_id creation.
task_str = "%s-%s\n" % (base, idx)
hash = md5(task_str.encode("utf-8")).hexdigest()[-8:]
return "t-" + hash
if __name__ == "__main__":
print(generate_input_paths(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]))
================================================
FILE: metaflow/plugins/argo/jobset_input_paths.py
================================================
import sys
def generate_input_paths(run_id, step_name, task_id_entropy, num_parallel):
# => run_id/step/:foo,bar
control_id = "control-{}-0".format(task_id_entropy)
worker_ids = [
"worker-{}-{}".format(task_id_entropy, i) for i in range(int(num_parallel) - 1)
]
ids = [control_id] + worker_ids
return "{}/{}/:{}".format(run_id, step_name, ",".join(ids))
if __name__ == "__main__":
print(generate_input_paths(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]))
================================================
FILE: metaflow/plugins/argo/param_val.py
================================================
import sys
import base64
import json
def parse_parameter_value(base64_value):
val = base64.b64decode(base64_value).decode("utf-8")
try:
return json.loads(val)
except json.decoder.JSONDecodeError:
# fallback to using the original value.
return val
if __name__ == "__main__":
base64_val = sys.argv[1]
print(parse_parameter_value(base64_val))
================================================
FILE: metaflow/plugins/aws/__init__.py
================================================
================================================
FILE: metaflow/plugins/aws/aws_client.py
================================================
cached_aws_sandbox_creds = None
cached_provider_class = None
class Boto3ClientProvider(object):
name = "boto3"
@staticmethod
def get_client(
module, with_error=False, role_arn=None, session_vars=None, client_params=None
):
from metaflow.exception import MetaflowException
from metaflow.metaflow_config import (
AWS_SANDBOX_ENABLED,
AWS_SANDBOX_STS_ENDPOINT_URL,
AWS_SANDBOX_API_KEY,
S3_CLIENT_RETRY_CONFIG,
)
if session_vars is None:
session_vars = {}
if client_params is None:
client_params = {}
import requests
try:
import boto3
import botocore
from botocore.exceptions import ClientError
from botocore.config import Config
except (NameError, ImportError):
raise MetaflowException(
"Could not import module 'boto3'. Install boto3 first."
)
# Convert dictionary config to Config object if needed
if "config" in client_params and not isinstance(
client_params["config"], Config
):
client_params["config"] = Config(**client_params["config"])
if module == "s3" and (
"config" not in client_params or client_params["config"].retries is None
):
# do not set anything if the user has already set something
config = client_params.get("config", Config())
config.retries = S3_CLIENT_RETRY_CONFIG
client_params["config"] = config
if AWS_SANDBOX_ENABLED:
# role is ignored in the sandbox
global cached_aws_sandbox_creds
if cached_aws_sandbox_creds is None:
# authenticate using STS
url = "%s/auth/token" % AWS_SANDBOX_STS_ENDPOINT_URL
headers = {"x-api-key": AWS_SANDBOX_API_KEY}
try:
r = requests.get(url, headers=headers)
r.raise_for_status()
cached_aws_sandbox_creds = r.json()
except requests.exceptions.HTTPError as e:
raise MetaflowException(repr(e))
if with_error:
return (
boto3.session.Session(**cached_aws_sandbox_creds).client(
module, **client_params
),
ClientError,
)
return boto3.session.Session(**cached_aws_sandbox_creds).client(
module, **client_params
)
session = boto3.session.Session()
if role_arn:
fetcher = botocore.credentials.AssumeRoleCredentialFetcher(
client_creator=session._session.create_client,
source_credentials=session._session.get_credentials(),
role_arn=role_arn,
extra_args={},
)
creds = botocore.credentials.DeferredRefreshableCredentials(
method="assume-role", refresh_using=fetcher.fetch_credentials
)
botocore_session = botocore.session.Session(session_vars=session_vars)
botocore_session._credentials = creds
session = boto3.session.Session(botocore_session=botocore_session)
if with_error:
return session.client(module, **client_params), ClientError
return session.client(module, **client_params)
def get_aws_client(
module, with_error=False, role_arn=None, session_vars=None, client_params=None
):
global cached_provider_class
if cached_provider_class is None:
from metaflow.metaflow_config import DEFAULT_AWS_CLIENT_PROVIDER
from metaflow.plugins import AWS_CLIENT_PROVIDERS
for p in AWS_CLIENT_PROVIDERS:
if p.name == DEFAULT_AWS_CLIENT_PROVIDER:
cached_provider_class = p
break
else:
raise ValueError(
"Cannot find AWS Client provider %s" % DEFAULT_AWS_CLIENT_PROVIDER
)
return cached_provider_class.get_client(
module,
with_error,
role_arn=role_arn,
session_vars=session_vars,
client_params=client_params,
)
================================================
FILE: metaflow/plugins/aws/aws_utils.py
================================================
import re
from metaflow.exception import MetaflowException
def parse_s3_full_path(s3_uri):
from urllib.parse import urlparse
# :///;?#
scheme, netloc, path, _, _, _ = urlparse(s3_uri)
assert scheme == "s3"
assert netloc is not None
bucket = netloc
path = path.lstrip("/").rstrip("/")
if path == "":
path = None
return bucket, path
def get_ec2_instance_metadata():
"""
Fetches the EC2 instance metadata through AWS instance metadata service
Returns either an empty dictionary, or one with the keys
- ec2-instance-id
- ec2-instance-type
- ec2-region
- ec2-availability-zone
"""
# TODO: Remove dependency on requests
import requests
meta = {}
# Capture AWS instance identity metadata. This is best-effort only since
# access to this end-point might be blocked on AWS and not available
# for non-AWS deployments.
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html
# Set a very aggressive timeout, as the communication is happening in the same subnet,
# there should not be any significant delay in the response.
# Having a long default timeout here introduces unnecessary delay in launching tasks when the
# instance is unreachable.
timeout = (1, 10)
token = None
try:
# Try to get an IMDSv2 token.
token = requests.put(
url="http://169.254.169.254/latest/api/token",
headers={"X-aws-ec2-metadata-token-ttl-seconds": "100"},
timeout=timeout,
).text
except:
pass
try:
headers = {}
# Add IMDSv2 token if available, else fall back to IMDSv1.
if token:
headers["X-aws-ec2-metadata-token"] = token
instance_meta = requests.get(
url="http://169.254.169.254/latest/dynamic/instance-identity/document",
headers=headers,
timeout=timeout,
).json()
meta["ec2-instance-id"] = instance_meta.get("instanceId")
meta["ec2-instance-type"] = instance_meta.get("instanceType")
meta["ec2-region"] = instance_meta.get("region")
meta["ec2-availability-zone"] = instance_meta.get("availabilityZone")
except:
pass
return meta
def get_docker_registry(image_uri):
"""
Explanation:
(.+?(?:[:.].+?)\\/)? - [GROUP 0] REGISTRY
.+? - A registry must start with at least one character
(?:[:.].+?)\\/ - A registry must have ":" or "." and end with "/"
? - Make a registry optional
(.*?) - [GROUP 1] REPOSITORY
.*? - Get repository name until separator
(?:[@:])? - SEPARATOR
?: - Don't capture separator
[@:] - The separator must be either "@" or ":"
? - The separator is optional
((?<=[@:]).*)? - [GROUP 2] TAG / DIGEST
(?<=[@:]) - A tag / digest must be preceded by "@" or ":"
.* - Capture rest of tag / digest
? - A tag / digest is optional
Examples:
image
- None
- image
- None
example/image
- None
- example/image
- None
example/image:tag
- None
- example/image
- tag
example.domain.com/example/image:tag
- example.domain.com/
- example/image
- tag
123.123.123.123:123/example/image:tag
- 123.123.123.123:123/
- example/image
- tag
example.domain.com/example/image@sha256:45b23dee0
- example.domain.com/
- example/image
- sha256:45b23dee0
"""
pattern = re.compile(r"^(.+?(?:[:.].+?)\/)?(.*?)(?:[@:])?((?<=[@:]).*)?$")
registry, repository, tag = pattern.match(image_uri).groups()
if registry is not None:
registry = registry.rstrip("/")
return registry
def compute_resource_attributes(decos, compute_deco, resource_defaults):
"""
Compute resource values taking into account defaults, the values specified
in the compute decorator (like @batch or @kubernetes) directly, and
resources specified via @resources decorator.
Returns a dictionary of resource attr -> value (str).
"""
assert compute_deco is not None
supported_keys = set([*resource_defaults.keys(), *compute_deco.attributes.keys()])
# Use the value from resource_defaults by default (don't use None)
result = {k: v for k, v in resource_defaults.items() if v is not None}
for deco in decos:
# If resource decorator is used
if deco.name == "resources":
for k, v in deco.attributes.items():
my_val = compute_deco.attributes.get(k)
# We use the non None value if there is only one or the larger value
# if they are both non None. Note this considers "" to be equivalent to
# the value zero.
#
# Skip attributes that are not supported by the decorator.
if k not in supported_keys:
continue
if my_val is None and v is None:
continue
if my_val is not None and v is not None:
try:
# Use Decimals to compare and convert to string here so
# that numbers that can't be exactly represented as
# floats (e.g. 0.8) still look "nice". We don't care
# about precision more that .001 for resources anyway.
result[k] = str(max(float(my_val or 0), float(v or 0)))
except ValueError:
# Here we don't have ints, so we compare the value and raise
# an exception if not equal
if my_val != v:
# TODO: Throw a better exception since the user has no
# knowledge of 'compute' decorator
raise MetaflowException(
"'resources' and compute decorator have conflicting "
"values for '%s'. Please use consistent values or "
"specify this resource constraint once" % k
)
elif my_val is not None:
result[k] = str(my_val or "0")
else:
result[k] = str(v or "0")
return result
# If there is no resources decorator, values from compute_deco override
# the defaults.
for k in resource_defaults:
if compute_deco.attributes.get(k) is not None:
result[k] = str(compute_deco.attributes[k] or "0")
return result
def sanitize_batch_tag(key, value):
"""
Sanitize a key and value for use as a Batch tag.
"""
# https://docs.aws.amazon.com/batch/latest/userguide/using-tags.html#tag-restrictions
RE_NOT_PERMITTED = r"[^A-Za-z0-9\s\+\-\=\.\_\:\/\@]"
_key = re.sub(RE_NOT_PERMITTED, "", key)[:128]
_value = re.sub(RE_NOT_PERMITTED, "", value)[:256]
return _key, _value
def validate_aws_tag(key: str, value: str):
PERMITTED = r"[A-Za-z0-9\s\+\-\=\.\_\:\/\@]"
AWS_PREFIX = r"^aws\:" # case-insensitive.
if re.match(AWS_PREFIX, key, re.IGNORECASE) or re.match(
AWS_PREFIX, value, re.IGNORECASE
):
raise MetaflowException(
"'aws:' is not an allowed prefix for either tag keys or values"
)
if len(key) > 128:
raise MetaflowException(
"Tag key *%s* is too long. Maximum allowed tag key length is 128." % key
)
if len(value) > 256:
raise MetaflowException(
"Tag value *%s* is too long. Maximum allowed tag value length is 256."
% value
)
if not re.match(PERMITTED, key):
raise MetaflowException(
"Key *s* is not permitted. Tags must match pattern: %s" % (key, PERMITTED)
)
if not re.match(PERMITTED, value):
raise MetaflowException(
"Value *%s* is not permitted. Tags must match pattern: %s"
% (value, PERMITTED)
)
================================================
FILE: metaflow/plugins/aws/batch/__init__.py
================================================
================================================
FILE: metaflow/plugins/aws/batch/batch.py
================================================
import atexit
import copy
import json
import os
import select
import shlex
import time
from metaflow import util
from metaflow.plugins.datatools.s3.s3tail import S3Tail
from metaflow.plugins.aws.aws_utils import sanitize_batch_tag
from metaflow.exception import MetaflowException
from metaflow.metaflow_config import (
OTEL_ENDPOINT,
SERVICE_INTERNAL_URL,
DATATOOLS_S3ROOT,
DATASTORE_SYSROOT_S3,
DEFAULT_METADATA,
SERVICE_HEADERS,
BATCH_EMIT_TAGS,
CARD_S3ROOT,
S3_ENDPOINT_URL,
DEFAULT_SECRETS_BACKEND_TYPE,
AWS_SECRETS_MANAGER_DEFAULT_REGION,
S3_SERVER_SIDE_ENCRYPTION,
)
from metaflow.metaflow_config_funcs import config_values
from metaflow.mflog import (
export_mflog_env_vars,
bash_capture_logs,
tail_logs,
BASH_SAVE_LOGS,
)
from .batch_client import BatchClient
# Redirect structured logs to $PWD/.logs/
LOGS_DIR = "$PWD/.logs"
STDOUT_FILE = "mflog_stdout"
STDERR_FILE = "mflog_stderr"
STDOUT_PATH = os.path.join(LOGS_DIR, STDOUT_FILE)
STDERR_PATH = os.path.join(LOGS_DIR, STDERR_FILE)
class BatchException(MetaflowException):
headline = "AWS Batch error"
class BatchKilledException(MetaflowException):
headline = "AWS Batch task killed"
class Batch(object):
def __init__(self, metadata, environment, flow_datastore=None):
self.metadata = metadata
self.environment = environment
self.flow_datastore = flow_datastore
self._client = BatchClient()
atexit.register(lambda: self.job.kill() if hasattr(self, "job") else None)
def _command(
self,
environment,
code_package_metadata,
code_package_url,
step_name,
step_cmds,
task_spec,
offload_command_to_s3,
):
mflog_expr = export_mflog_env_vars(
datastore_type="s3",
stdout_path=STDOUT_PATH,
stderr_path=STDERR_PATH,
**task_spec
)
init_cmds = environment.get_package_commands(
code_package_url, "s3", code_package_metadata
)
init_expr = " && ".join(init_cmds)
step_expr = bash_capture_logs(
" && ".join(environment.bootstrap_commands(step_name, "s3") + step_cmds)
)
# construct an entry point that
# 1) initializes the mflog environment (mflog_expr)
# 2) bootstraps a metaflow environment (init_expr)
# 3) executes a task (step_expr)
# the `true` command is to make sure that the generated command
# plays well with docker containers which have entrypoint set as
# eval $@
cmd_str = "true && mkdir -p %s && %s && %s && %s; " % (
LOGS_DIR,
mflog_expr,
init_expr,
step_expr,
)
# after the task has finished, we save its exit code (fail/success)
# and persist the final logs. The whole entrypoint should exit
# with the exit code (c) of the task.
#
# Note that if step_expr OOMs, this tail expression is never executed.
# We lose the last logs in this scenario (although they are visible
# still through AWS CloudWatch console).
cmd_str += "c=$?; %s; exit $c" % BASH_SAVE_LOGS
command = shlex.split('bash -c "%s"' % cmd_str)
if not offload_command_to_s3:
return command
# If S3 upload is enabled, we need to modify the command after it's created
if self.flow_datastore is None:
raise MetaflowException(
"Can not offload Batch command to S3 without a datastore configured."
)
from metaflow.plugins.aws.aws_utils import parse_s3_full_path
# Get the command that was created
# Upload the command to S3 during deployment
try:
# IMPORTANT: Save the shlex-processed command (command[-1]), NOT the raw cmd_str.
# The escaping in _get_download_code_package_cmd uses \" which is designed to be
# processed by shlex.split('bash -c "%s"' % cmd_str). When we save to a file and
# execute with 'bash /tmp/step_command.sh', there's no shlex processing, so we
# must save the already-processed command where \" has been converted to ".
# This is the bash -c argument after shlex processing
processed_cmd = command[-1]
command_bytes = processed_cmd.encode("utf-8")
result_paths = self.flow_datastore.save_data([command_bytes], len_hint=1)
s3_path, _key = result_paths[0]
bucket, s3_object = parse_s3_full_path(s3_path)
# NOTE: the script quoting is extremely sensitive due to the way shlex.split operates
# and this being inserted into a quoted command elsewhere. Use escaped quotes.
download_script = "{python} -c '{script}'".format(
python=self.environment._python(),
script='import boto3, os; ep=os.getenv(\\"METAFLOW_S3_ENDPOINT_URL\\"); boto3.client(\\"s3\\", **({\\"endpoint_url\\":ep} if ep else {})).download_file(\\"%s\\", \\"%s\\", \\"/tmp/step_command.sh\\")'
% (bucket, s3_object),
)
download_cmd = (
f"{self.environment._get_install_dependencies_cmd('s3')} && " # required for boto3 due to the original dependencies cmd getting packaged, and not being downloaded in time.
f"{download_script} && "
f"chmod +x /tmp/step_command.sh && "
f"bash /tmp/step_command.sh"
)
new_cmd = shlex.split('bash -c "%s"' % download_cmd)
return new_cmd
except Exception as e:
print(f"Warning: Failed to upload command to S3: {e}")
print("Falling back to inline command")
def _search_jobs(self, flow_name, run_id, user):
if user is None:
regex = "-{flow_name}-".format(flow_name=flow_name)
else:
regex = "{user}-{flow_name}-".format(user=user, flow_name=flow_name)
jobs = []
for job in self._client.unfinished_jobs():
if regex in job["jobName"]:
jobs.append(job["jobId"])
if run_id is not None:
run_id = run_id[run_id.startswith("sfn-") and len("sfn-") :]
for job in self._client.describe_jobs(jobs):
parameters = job["parameters"]
match = (
(user is None or parameters["metaflow.user"] == user)
and (parameters["metaflow.flow_name"] == flow_name)
and (run_id is None or parameters["metaflow.run_id"] == run_id)
)
if match:
yield job
def _job_name(self, user, flow_name, run_id, step_name, task_id, retry_count):
return "{user}-{flow_name}-{run_id}-{step_name}-{task_id}-{retry_count}".format(
user=user,
flow_name=flow_name,
run_id=str(run_id) if run_id is not None else "",
step_name=step_name,
task_id=str(task_id) if task_id is not None else "",
retry_count=str(retry_count) if retry_count is not None else "",
)
def list_jobs(self, flow_name, run_id, user, echo):
jobs = self._search_jobs(flow_name, run_id, user)
found = False
for job in jobs:
found = True
echo(
"{name} [{id}] ({status})".format(
name=job["jobName"], id=job["jobId"], status=job["status"]
)
)
if not found:
echo("No running AWS Batch jobs found.")
def kill_jobs(self, flow_name, run_id, user, echo):
jobs = self._search_jobs(flow_name, run_id, user)
found = False
for job in jobs:
found = True
try:
self._client.attach_job(job["jobId"]).kill()
echo(
"Killing AWS Batch job: {name} [{id}] ({status})".format(
name=job["jobName"],
id=job["jobId"],
status=job["status"],
)
)
except Exception as e:
echo(
"Failed to terminate AWS Batch job %s [%s]"
% (job["jobId"], repr(e))
)
if not found:
echo("No running AWS Batch jobs found.")
def create_job(
self,
step_name,
step_cli,
task_spec,
code_package_metadata,
code_package_sha,
code_package_url,
code_package_ds,
image,
queue,
iam_role=None,
execution_role=None,
cpu=None,
gpu=None,
memory=None,
run_time_limit=None,
shared_memory=None,
max_swap=None,
swappiness=None,
inferentia=None,
efa=None,
env={},
attrs={},
host_volumes=None,
efs_volumes=None,
use_tmpfs=None,
aws_batch_tags=None,
tmpfs_tempdir=None,
tmpfs_size=None,
tmpfs_path=None,
num_parallel=0,
ephemeral_storage=None,
log_driver=None,
log_options=None,
offload_command_to_s3=False,
privileged=False,
):
job_name = self._job_name(
attrs.get("metaflow.user"),
attrs.get("metaflow.flow_name"),
attrs.get("metaflow.run_id"),
attrs.get("metaflow.step_name"),
attrs.get("metaflow.task_id"),
attrs.get("metaflow.retry_count"),
)
job = (
self._client.job()
.job_name(job_name)
.job_queue(queue)
.command(
self._command(
self.environment,
code_package_metadata,
code_package_url,
step_name,
[step_cli],
task_spec,
offload_command_to_s3,
)
)
.image(image)
.iam_role(iam_role)
.execution_role(execution_role)
.cpu(cpu)
.gpu(gpu)
.memory(memory)
.shared_memory(shared_memory)
.max_swap(max_swap)
.swappiness(swappiness)
.inferentia(inferentia)
.efa(efa)
.timeout_in_secs(run_time_limit)
.job_def(
image,
iam_role,
queue,
execution_role,
shared_memory,
max_swap,
swappiness,
inferentia,
efa,
memory=memory,
host_volumes=host_volumes,
efs_volumes=efs_volumes,
use_tmpfs=use_tmpfs,
tmpfs_tempdir=tmpfs_tempdir,
tmpfs_size=tmpfs_size,
tmpfs_path=tmpfs_path,
num_parallel=num_parallel,
ephemeral_storage=ephemeral_storage,
log_driver=log_driver,
log_options=log_options,
privileged=privileged,
)
.task_id(attrs.get("metaflow.task_id"))
.environment_variable("AWS_DEFAULT_REGION", self._client.region())
.environment_variable("METAFLOW_CODE_METADATA", code_package_metadata)
.environment_variable("METAFLOW_CODE_SHA", code_package_sha)
.environment_variable("METAFLOW_CODE_URL", code_package_url)
.environment_variable("METAFLOW_CODE_DS", code_package_ds)
.environment_variable("METAFLOW_USER", attrs["metaflow.user"])
.environment_variable("METAFLOW_SERVICE_URL", SERVICE_INTERNAL_URL)
.environment_variable(
"METAFLOW_SERVICE_HEADERS", json.dumps(SERVICE_HEADERS)
)
.environment_variable("METAFLOW_DATASTORE_SYSROOT_S3", DATASTORE_SYSROOT_S3)
.environment_variable("METAFLOW_DATATOOLS_S3ROOT", DATATOOLS_S3ROOT)
.environment_variable("METAFLOW_DEFAULT_DATASTORE", "s3")
.environment_variable("METAFLOW_DEFAULT_METADATA", DEFAULT_METADATA)
.environment_variable("METAFLOW_CARD_S3ROOT", CARD_S3ROOT)
.environment_variable("METAFLOW_OTEL_ENDPOINT", OTEL_ENDPOINT)
.environment_variable("METAFLOW_RUNTIME_ENVIRONMENT", "aws-batch")
)
# Temporary passing of *some* environment variables. Do not rely on this
# mechanism as it will be removed in the near future
for k, v in config_values():
if k.startswith("METAFLOW_CONDA_") or k.startswith("METAFLOW_DEBUG_"):
job.environment_variable(k, v)
if DEFAULT_SECRETS_BACKEND_TYPE is not None:
job.environment_variable(
"METAFLOW_DEFAULT_SECRETS_BACKEND_TYPE", DEFAULT_SECRETS_BACKEND_TYPE
)
if AWS_SECRETS_MANAGER_DEFAULT_REGION is not None:
job.environment_variable(
"METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION",
AWS_SECRETS_MANAGER_DEFAULT_REGION,
)
tmpfs_enabled = use_tmpfs or (tmpfs_size and not use_tmpfs)
if tmpfs_enabled and tmpfs_tempdir:
job.environment_variable("METAFLOW_TEMPDIR", tmpfs_path)
if S3_SERVER_SIDE_ENCRYPTION is not None:
job.environment_variable(
"METAFLOW_S3_SERVER_SIDE_ENCRYPTION", S3_SERVER_SIDE_ENCRYPTION
)
# Skip setting METAFLOW_DATASTORE_SYSROOT_LOCAL because metadata sync between the local user
# instance and the remote AWS Batch instance assumes metadata is stored in DATASTORE_LOCAL_DIR
# on the remote AWS Batch instance; this happens when METAFLOW_DATASTORE_SYSROOT_LOCAL
# is NOT set (see get_datastore_root_from_config in datastore/local.py).
# add METAFLOW_S3_ENDPOINT_URL
if S3_ENDPOINT_URL is not None:
job.environment_variable("METAFLOW_S3_ENDPOINT_URL", S3_ENDPOINT_URL)
for name, value in env.items():
job.environment_variable(name, value)
if attrs:
for key, value in attrs.items():
job.parameter(key, value)
# Tags for AWS Batch job (for say cost attribution)
if BATCH_EMIT_TAGS:
job.tag("app", "metaflow")
for key in [
"metaflow.flow_name",
"metaflow.run_id",
"metaflow.step_name",
"metaflow.run_id.$",
"metaflow.production_token",
]:
if key in attrs:
job.tag(key, attrs.get(key))
# As some values can be affected by users, sanitize them so they adhere to AWS tagging restrictions.
for key in [
"metaflow.version",
"metaflow.user",
"metaflow.owner",
]:
if key in attrs:
k, v = sanitize_batch_tag(key, attrs.get(key))
job.tag(k, v)
if aws_batch_tags is not None:
for key, value in aws_batch_tags.items():
job.tag(key, value)
return job
def launch_job(
self,
step_name,
step_cli,
task_spec,
code_package_metadata,
code_package_sha,
code_package_url,
code_package_ds,
image,
queue,
iam_role=None,
execution_role=None, # for FARGATE compatibility
cpu=None,
gpu=None,
memory=None,
run_time_limit=None,
shared_memory=None,
max_swap=None,
swappiness=None,
inferentia=None,
efa=None,
host_volumes=None,
efs_volumes=None,
use_tmpfs=None,
aws_batch_tags=None,
tmpfs_tempdir=None,
tmpfs_size=None,
tmpfs_path=None,
num_parallel=0,
env={},
attrs={},
ephemeral_storage=None,
log_driver=None,
log_options=None,
privileged=None,
):
if queue is None:
queue = next(self._client.active_job_queues(), None)
if queue is None:
raise BatchException(
"Unable to launch AWS Batch job. No job queue "
" specified and no valid & enabled queue found."
)
job = self.create_job(
step_name,
step_cli,
task_spec,
code_package_metadata,
code_package_sha,
code_package_url,
code_package_ds,
image,
queue,
iam_role,
execution_role,
cpu,
gpu,
memory,
run_time_limit,
shared_memory,
max_swap,
swappiness,
inferentia,
efa,
env=env,
attrs=attrs,
host_volumes=host_volumes,
efs_volumes=efs_volumes,
use_tmpfs=use_tmpfs,
aws_batch_tags=aws_batch_tags,
tmpfs_tempdir=tmpfs_tempdir,
tmpfs_size=tmpfs_size,
tmpfs_path=tmpfs_path,
num_parallel=num_parallel,
ephemeral_storage=ephemeral_storage,
log_driver=log_driver,
log_options=log_options,
privileged=privileged,
)
self.num_parallel = num_parallel
self.job = job.execute()
def wait(self, stdout_location, stderr_location, echo=None):
def wait_for_launch(job, child_jobs):
status = job.status
echo(
"Task is starting (status %s)..." % status,
"stderr",
batch_id=job.id,
)
t = time.time()
while True:
if status != job.status or (time.time() - t) > 30:
if not child_jobs:
child_statuses = ""
else:
status_keys = set(
[child_job.status for child_job in child_jobs]
)
status_counts = [
(
status,
len(
[
child_job.status == status
for child_job in child_jobs
]
),
)
for status in status_keys
]
child_statuses = " (parallel node status: [{}])".format(
", ".join(
[
"{}:{}".format(status, num)
for (status, num) in sorted(status_counts)
]
)
)
status = job.status
echo(
"Task is starting (status %s)... %s" % (status, child_statuses),
"stderr",
batch_id=job.id,
)
t = time.time()
if job.is_running or job.is_done or job.is_crashed:
break
select.poll().poll(200)
prefix = b"[%s] " % util.to_bytes(self.job.id)
stdout_tail = S3Tail(stdout_location)
stderr_tail = S3Tail(stderr_location)
child_jobs = []
if self.num_parallel > 1:
for node in range(1, self.num_parallel):
child_job = copy.copy(self.job)
child_job._id = child_job._id + "#{}".format(node)
child_jobs.append(child_job)
# 1) Loop until the job has started
wait_for_launch(self.job, child_jobs)
# 2) Tail logs until the job has finished
tail_logs(
prefix=prefix,
stdout_tail=stdout_tail,
stderr_tail=stderr_tail,
echo=echo,
has_log_updates=lambda: self.job.is_running,
)
# In case of hard crashes (OOM), the final save_logs won't happen.
# We can fetch the remaining logs from AWS CloudWatch and persist them
# to Amazon S3.
if self.job.is_crashed:
msg = next(
msg
for msg in [
self.job.reason,
self.job.status_reason,
"Task crashed.",
]
if msg is not None
)
raise BatchException(
"%s " "This could be a transient error. " "Use @retry to retry." % msg
)
else:
if self.job.is_running:
# Kill the job if it is still running by throwing an exception.
raise BatchException("Task failed!")
echo(
"Task finished with exit code %s." % self.job.status_code,
"stderr",
batch_id=self.job.id,
)
================================================
FILE: metaflow/plugins/aws/batch/batch_cli.py
================================================
from metaflow._vendor import click
import os
import sys
import time
import traceback
from metaflow import util
from metaflow import R
from metaflow.exception import CommandException, METAFLOW_EXIT_DISALLOW_RETRY
from metaflow.metadata_provider.util import sync_local_metadata_from_datastore
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
from metaflow.mflog import TASK_LOG_SOURCE
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
from .batch import Batch, BatchKilledException
from ..aws_utils import validate_aws_tag
@click.group()
def cli():
pass
@cli.group(help="Commands related to AWS Batch.")
def batch():
pass
def _execute_cmd(func, flow_name, run_id, user, my_runs, echo):
if user and my_runs:
raise CommandException("--user and --my-runs are mutually exclusive.")
if run_id and my_runs:
raise CommandException("--run_id and --my-runs are mutually exclusive.")
if my_runs:
user = util.get_username()
latest_run = True
if user and not run_id:
latest_run = False
if not run_id and latest_run:
run_id = util.get_latest_run_id(echo, flow_name)
if run_id is None:
raise CommandException("A previous run id was not found. Specify --run-id.")
func(flow_name, run_id, user, echo)
@batch.command(
"list",
help="\b\nList unfinished AWS Batch tasks of this flow.\n"
"By default, consider the latest run only.",
)
@click.option(
"--my-runs",
default=False,
is_flag=True,
help="List my unfinished tasks, across all runs.",
)
@click.option(
"--user",
default=None,
help="List unfinished tasks for the given user, across all runs.",
)
@click.option(
"--run-id",
default=None,
help="List unfinished tasks corresponding to the run id.",
)
@click.pass_context
def _list(ctx, run_id, user, my_runs):
batch = Batch(ctx.obj.metadata, ctx.obj.environment)
_execute_cmd(
batch.list_jobs, ctx.obj.flow.name, run_id, user, my_runs, ctx.obj.echo
)
@batch.command(
help="\b\nTerminate unfinished AWS Batch tasks of this flow.\n"
"By default, consider the latest run only.",
)
@click.option(
"--my-runs",
default=False,
is_flag=True,
help="Kill my unfinished tasks, across all runs.",
)
@click.option(
"--user",
default=None,
help="Terminate unfinished tasks for the given user, across all runs.",
)
@click.option(
"--run-id",
default=None,
help="Terminate unfinished tasks corresponding to the run id.",
)
@click.pass_context
def kill(ctx, run_id, user, my_runs):
batch = Batch(ctx.obj.metadata, ctx.obj.environment)
_execute_cmd(
batch.kill_jobs, ctx.obj.flow.name, run_id, user, my_runs, ctx.obj.echo
)
@batch.command(
help="Execute a single task using AWS Batch. This command calls the "
"top-level step command inside a AWS Batch job with the given options. "
"Typically you do not call this command directly; it is used internally by "
"Metaflow."
)
@click.argument("step-name")
@click.argument("code-package-metadata")
@click.argument("code-package-sha")
@click.argument("code-package-url")
@click.option("--executable", help="Executable requirement for AWS Batch.")
@click.option(
"--image",
help="Docker image requirement for AWS Batch. In name:version format.",
)
@click.option("--iam-role", help="IAM role requirement for AWS Batch.")
@click.option(
"--execution-role",
help="Execution role requirement for AWS Batch on Fargate.",
)
@click.option("--cpu", help="CPU requirement for AWS Batch.")
@click.option("--gpu", help="GPU requirement for AWS Batch.")
@click.option("--memory", help="Memory requirement for AWS Batch.")
@click.option("--queue", help="Job execution queue for AWS Batch.")
@click.option("--run-id", help="Passed to the top-level 'step'.")
@click.option("--task-id", help="Passed to the top-level 'step'.")
@click.option("--input-paths", help="Passed to the top-level 'step'.")
@click.option("--split-index", help="Passed to the top-level 'step'.")
@click.option("--clone-path", help="Passed to the top-level 'step'.")
@click.option("--clone-run-id", help="Passed to the top-level 'step'.")
@click.option(
"--tag", multiple=True, default=None, help="Passed to the top-level 'step'."
)
@click.option("--namespace", default=None, help="Passed to the top-level 'step'.")
@click.option("--retry-count", default=0, help="Passed to the top-level 'step'.")
@click.option(
"--max-user-code-retries", default=0, help="Passed to the top-level 'step'."
)
@click.option(
"--run-time-limit",
default=5 * 24 * 60 * 60,
help="Run time limit in seconds for the AWS Batch job. Default is 5 days.",
)
@click.option("--shared-memory", help="Shared Memory requirement for AWS Batch.")
@click.option("--max-swap", help="Max Swap requirement for AWS Batch.")
@click.option("--swappiness", help="Swappiness requirement for AWS Batch.")
@click.option("--inferentia", help="Inferentia requirement for AWS Batch.")
@click.option(
"--efa",
default=0,
type=int,
help="Activate designated number of elastic fabric adapter devices. "
"EFA driver must be installed and instance type compatible with EFA",
)
@click.option(
"--aws-batch-tag",
"aws_batch_tags",
multiple=True,
default=None,
help="AWS tags. Format: key=value, multiple allowed",
)
@click.option("--use-tmpfs", is_flag=True, help="tmpfs requirement for AWS Batch.")
@click.option("--tmpfs-tempdir", is_flag=True, help="tmpfs requirement for AWS Batch.")
@click.option("--tmpfs-size", help="tmpfs requirement for AWS Batch.")
@click.option("--tmpfs-path", help="tmpfs requirement for AWS Batch.")
# NOTE: ubf-context is not explicitly used, but @parallel decorator tries to pass this so keep it for now
@click.option(
"--ubf-context", default=None, type=click.Choice(["none", UBF_CONTROL, UBF_TASK])
)
@click.option("--host-volumes", multiple=True)
@click.option("--efs-volumes", multiple=True)
@click.option(
"--ephemeral-storage",
default=None,
type=int,
help="Ephemeral storage (for AWS Batch only)",
)
@click.option(
"--log-driver",
default=None,
type=str,
help="Log driver for AWS ECS container",
)
@click.option(
"--log-options",
default=None,
type=str,
multiple=True,
help="Log options for the chosen log driver",
)
@click.option(
"--num-parallel",
default=0,
type=int,
help="Number of parallel nodes to run as a multi-node job.",
)
@click.option("--privileged", is_flag=True, help="Run the AWS Batch Job as privileged")
@click.pass_context
def step(
ctx,
step_name,
code_package_metadata,
code_package_sha,
code_package_url,
executable=None,
image=None,
iam_role=None,
execution_role=None,
cpu=None,
gpu=None,
memory=None,
queue=None,
run_time_limit=None,
shared_memory=None,
max_swap=None,
swappiness=None,
inferentia=None,
efa=None,
aws_batch_tags=None,
use_tmpfs=None,
tmpfs_tempdir=None,
tmpfs_size=None,
tmpfs_path=None,
host_volumes=None,
efs_volumes=None,
ephemeral_storage=None,
log_driver=None,
log_options=None,
num_parallel=None,
privileged=None,
**kwargs
):
def echo(msg, stream="stderr", batch_id=None, **kwargs):
msg = util.to_unicode(msg)
if batch_id:
msg = "[%s] %s" % (batch_id, msg)
ctx.obj.echo_always(msg, err=(stream == sys.stderr), **kwargs)
if R.use_r():
entrypoint = R.entrypoint()
else:
executable = ctx.obj.environment.executable(step_name, executable)
entrypoint = "%s -u %s" % (executable, os.path.basename(sys.argv[0]))
top_args = " ".join(util.dict_to_cli_options(ctx.parent.parent.params))
input_paths = kwargs.get("input_paths")
split_vars = None
if input_paths:
max_size = 30 * 1024
split_vars = {
"METAFLOW_INPUT_PATHS_%d" % (i // max_size): input_paths[i : i + max_size]
for i in range(0, len(input_paths), max_size)
}
kwargs["input_paths"] = "".join("${%s}" % s for s in split_vars.keys())
step_args = " ".join(util.dict_to_cli_options(kwargs))
num_parallel = num_parallel or 0
if num_parallel and num_parallel > 1:
# For multinode, we need to add a placeholder that can be mutated by the caller
step_args += " [multinode-args]"
step_cli = "{entrypoint} {top_args} step {step} {step_args}".format(
entrypoint=entrypoint,
top_args=top_args,
step=step_name,
step_args=step_args,
)
node = ctx.obj.graph[step_name]
# Get retry information
retry_count = kwargs.get("retry_count", 0)
retry_deco = [deco for deco in node.decorators if deco.name == "retry"]
minutes_between_retries = None
if retry_deco:
minutes_between_retries = int(
retry_deco[0].attributes.get("minutes_between_retries", 1)
)
# Set batch attributes
task_spec = {
"flow_name": ctx.obj.flow.name,
"step_name": step_name,
"run_id": kwargs["run_id"],
"task_id": kwargs["task_id"],
"retry_count": str(retry_count),
}
attrs = {"metaflow.%s" % k: v for k, v in task_spec.items()}
attrs["metaflow.user"] = util.get_username()
attrs["metaflow.version"] = ctx.obj.environment.get_environment_info()[
"metaflow_version"
]
env = {"METAFLOW_FLOW_FILENAME": os.path.basename(sys.argv[0])}
if aws_batch_tags is not None:
# We do not need to validate these again,
# as they come supplied by the batch decorator which already performed validation.
batch_tags = {}
for item in list(aws_batch_tags):
key, value = item.split("=")
batch_tags[key] = value
env_deco = [deco for deco in node.decorators if deco.name == "environment"]
if env_deco:
env.update(env_deco[0].attributes["vars"])
# Add the environment variables related to the input-paths argument
if split_vars:
env.update(split_vars)
if retry_count:
ctx.obj.echo_always(
"Sleeping %d minutes before the next AWS Batch retry"
% minutes_between_retries
)
time.sleep(minutes_between_retries * 60)
# this information is needed for log tailing
ds = ctx.obj.flow_datastore.get_task_datastore(
mode="w",
run_id=kwargs["run_id"],
step_name=step_name,
task_id=kwargs["task_id"],
attempt=int(retry_count),
)
stdout_location = ds.get_log_location(TASK_LOG_SOURCE, "stdout")
stderr_location = ds.get_log_location(TASK_LOG_SOURCE, "stderr")
def _sync_metadata():
if ctx.obj.metadata.TYPE == "local":
sync_local_metadata_from_datastore(
DATASTORE_LOCAL_DIR,
ctx.obj.flow_datastore.get_task_datastore(
kwargs["run_id"], step_name, kwargs["task_id"]
),
)
batch = Batch(ctx.obj.metadata, ctx.obj.environment)
try:
with ctx.obj.monitor.measure("metaflow.aws.batch.launch_job"):
batch.launch_job(
step_name,
step_cli,
task_spec,
code_package_metadata,
code_package_sha,
code_package_url,
ctx.obj.flow_datastore.TYPE,
image=image,
queue=queue,
iam_role=iam_role,
execution_role=execution_role,
cpu=cpu,
gpu=gpu,
memory=memory,
run_time_limit=run_time_limit,
shared_memory=shared_memory,
max_swap=max_swap,
swappiness=swappiness,
inferentia=inferentia,
efa=efa,
env=env,
attrs=attrs,
host_volumes=host_volumes,
efs_volumes=efs_volumes,
use_tmpfs=use_tmpfs,
aws_batch_tags=batch_tags,
tmpfs_tempdir=tmpfs_tempdir,
tmpfs_size=tmpfs_size,
tmpfs_path=tmpfs_path,
ephemeral_storage=ephemeral_storage,
log_driver=log_driver,
log_options=log_options,
num_parallel=num_parallel,
privileged=privileged,
)
except Exception:
traceback.print_exc()
_sync_metadata()
sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
try:
batch.wait(stdout_location, stderr_location, echo=echo)
except BatchKilledException:
# don't retry killed tasks
traceback.print_exc()
sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
finally:
_sync_metadata()
================================================
FILE: metaflow/plugins/aws/batch/batch_client.py
================================================
# -*- coding: utf-8 -*-
from collections import defaultdict
import copy
import random
import time
import hashlib
try:
unicode
except NameError:
unicode = str
basestring = str
from metaflow.exception import MetaflowException
from metaflow.metaflow_config import AWS_SANDBOX_ENABLED
class BatchClient(object):
def __init__(self):
from ..aws_client import get_aws_client
self._client = get_aws_client("batch")
def active_job_queues(self):
paginator = self._client.get_paginator("describe_job_queues")
return (
queue["jobQueueName"]
for page in paginator.paginate()
for queue in page["jobQueues"]
if queue["state"] == "ENABLED" and queue["status"] == "VALID"
)
def unfinished_jobs(self):
queues = self.active_job_queues()
return (
job
for queue in queues
for status in ["SUBMITTED", "PENDING", "RUNNABLE", "STARTING", "RUNNING"]
for page in self._client.get_paginator("list_jobs").paginate(
jobQueue=queue, jobStatus=status
)
for job in page["jobSummaryList"]
)
def describe_jobs(self, job_ids):
for jobIds in [job_ids[i : i + 100] for i in range(0, len(job_ids), 100)]:
for jobs in self._client.describe_jobs(jobs=jobIds)["jobs"]:
yield jobs
def describe_job_queue(self, job_queue):
paginator = self._client.get_paginator("describe_job_queues").paginate(
jobQueues=[job_queue], maxResults=1
)
return paginator.paginate()["jobQueues"][0]
def job(self):
return BatchJob(self._client)
def attach_job(self, job_id):
job = RunningJob(job_id, self._client)
return job.update()
def region(self):
return self._client._client_config.region_name
class BatchJobException(MetaflowException):
headline = "AWS Batch job error"
class BatchJob(object):
def __init__(self, client):
self._client = client
tree = lambda: defaultdict(tree)
self.payload = tree()
def execute(self):
if self._image is None:
raise BatchJobException(
"Unable to launch AWS Batch job. No docker image specified."
)
if self._iam_role is None:
raise BatchJobException(
"Unable to launch AWS Batch job. No IAM role specified."
)
# Multinode
if getattr(self, "num_parallel", 0) >= 1:
num_nodes = self.num_parallel
# We need this task-id set so that all the nodes are aware of the control
# task's task-id. These "MF_" variables populate the `current.parallel` namedtuple
self.environment_variable("MF_PARALLEL_CONTROL_TASK_ID", self._task_id)
main_task_override = copy.deepcopy(self.payload["containerOverrides"])
# main
commands = self.payload["containerOverrides"]["command"][-1]
# add split-index as this worker is also an ubf_task
commands = commands.replace("[multinode-args]", "--split-index 0")
main_task_override["command"][-1] = commands
# secondary tasks
secondary_task_container_override = copy.deepcopy(
self.payload["containerOverrides"]
)
secondary_commands = self.payload["containerOverrides"]["command"][-1]
# other tasks do not have control- prefix, and have the split id appended to the task -id
secondary_commands = secondary_commands.replace(
self._task_id,
self._task_id.replace("control-", "")
+ "-node-$AWS_BATCH_JOB_NODE_INDEX",
)
secondary_commands = secondary_commands.replace(
"ubf_control",
"ubf_task",
)
secondary_commands = secondary_commands.replace(
"[multinode-args]", "--split-index $AWS_BATCH_JOB_NODE_INDEX"
)
secondary_task_container_override["command"][-1] = secondary_commands
secondary_overrides = (
[
{
"targetNodes": "1:{}".format(num_nodes - 1),
"containerOverrides": secondary_task_container_override,
}
]
if num_nodes > 1
else []
)
self.payload["nodeOverrides"] = {
"nodePropertyOverrides": [
{"targetNodes": "0:0", "containerOverrides": main_task_override},
]
+ secondary_overrides,
}
del self.payload["containerOverrides"]
response = self._client.submit_job(**self.payload)
job = RunningJob(response["jobId"], self._client)
return job.update()
def _register_job_definition(
self,
image,
job_role,
job_queue,
execution_role,
shared_memory,
max_swap,
swappiness,
inferentia,
efa,
memory,
host_volumes,
efs_volumes,
use_tmpfs,
tmpfs_tempdir,
tmpfs_size,
tmpfs_path,
num_parallel,
ephemeral_storage,
log_driver,
log_options,
privileged,
):
# identify platform from any compute environment associated with the
# queue
if AWS_SANDBOX_ENABLED:
# within the Metaflow sandbox, we can't execute the
# describe_job_queues directive for AWS Batch to detect compute
# environment platform, so let's just default to EC2 for now.
platform = "EC2"
else:
response = self._client.describe_job_queues(jobQueues=[job_queue])
if len(response["jobQueues"]) == 0:
raise BatchJobException("AWS Batch Job Queue %s not found." % job_queue)
compute_environment = response["jobQueues"][0]["computeEnvironmentOrder"][
0
]["computeEnvironment"]
response = self._client.describe_compute_environments(
computeEnvironments=[compute_environment]
)
platform = response["computeEnvironments"][0]["computeResources"]["type"]
# compose job definition
job_definition = {
"type": "container",
"containerProperties": {
"image": image,
"jobRoleArn": job_role,
"command": ["echo", "hello world"],
"resourceRequirements": [
{"value": "1", "type": "VCPU"},
{"value": "4096", "type": "MEMORY"},
],
},
# This propagates the AWS Batch resource tags to the underlying
# ECS tasks.
"propagateTags": True,
}
if privileged:
job_definition["containerProperties"]["privileged"] = privileged
log_options_dict = {}
if log_options:
if isinstance(log_options, str):
log_options = [log_options]
for each_log_option in log_options:
k, v = each_log_option.split(":", 1)
log_options_dict[k] = v
if log_driver or log_options:
job_definition["containerProperties"]["logConfiguration"] = {}
if log_driver:
job_definition["containerProperties"]["logConfiguration"][
"logDriver"
] = log_driver
if log_options:
job_definition["containerProperties"]["logConfiguration"][
"options"
] = log_options_dict
if platform == "FARGATE" or platform == "FARGATE_SPOT":
if num_parallel > 1:
raise BatchJobException("Fargate does not support multinode jobs.")
if execution_role is None:
raise BatchJobException(
"No AWS Fargate task execution IAM role found. Please see "
"https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html "
"and set the role as METAFLOW_ECS_FARGATE_EXECUTION_ROLE "
"environment variable."
)
job_definition["containerProperties"]["executionRoleArn"] = execution_role
job_definition["platformCapabilities"] = ["FARGATE"]
job_definition["containerProperties"]["networkConfiguration"] = {
"assignPublicIp": "ENABLED"
}
if ephemeral_storage:
job_definition["containerProperties"]["ephemeralStorage"] = {
"sizeInGiB": ephemeral_storage
}
if platform == "EC2" or platform == "SPOT":
if "linuxParameters" not in job_definition["containerProperties"]:
job_definition["containerProperties"]["linuxParameters"] = {}
if shared_memory is not None:
if not (
isinstance(shared_memory, (int, unicode, basestring))
and int(float(shared_memory)) > 0
):
raise BatchJobException(
"Invalid shared memory size value ({}); "
"it should be greater than 0".format(shared_memory)
)
else:
job_definition["containerProperties"]["linuxParameters"][
"sharedMemorySize"
] = int(float(shared_memory))
if swappiness is not None:
if not (
isinstance(swappiness, (int, unicode, basestring))
and int(swappiness) >= 0
and int(swappiness) < 100
):
raise BatchJobException(
"Invalid swappiness value ({}); "
"(should be 0 or greater and less than 100)".format(swappiness)
)
else:
job_definition["containerProperties"]["linuxParameters"][
"swappiness"
] = int(swappiness)
if max_swap is not None:
if not (
isinstance(max_swap, (int, unicode, basestring))
and int(max_swap) >= 0
):
raise BatchJobException(
"Invalid swappiness value ({}); "
"(should be 0 or greater)".format(max_swap)
)
else:
job_definition["containerProperties"]["linuxParameters"][
"maxSwap"
] = int(max_swap)
if ephemeral_storage:
raise BatchJobException(
"The ephemeral_storage parameter is only available for FARGATE compute environments"
)
if inferentia:
if not (isinstance(inferentia, (int, unicode, basestring))):
raise BatchJobException(
"Invalid inferentia value: ({}) (should be 0 or greater)".format(
inferentia
)
)
else:
job_definition["containerProperties"]["linuxParameters"]["devices"] = []
for i in range(int(inferentia)):
job_definition["containerProperties"]["linuxParameters"][
"devices"
].append(
{
"containerPath": "/dev/neuron{}".format(i),
"hostPath": "/dev/neuron{}".format(i),
"permissions": ["READ", "WRITE"],
}
)
if host_volumes or efs_volumes:
job_definition["containerProperties"]["volumes"] = []
job_definition["containerProperties"]["mountPoints"] = []
if host_volumes:
if isinstance(host_volumes, str):
host_volumes = [host_volumes]
for host_path in host_volumes:
container_path = host_path
if ":" in host_path:
host_path, container_path = host_path.split(":", 1)
name = host_path.replace("/", "_").replace(".", "_")
job_definition["containerProperties"]["volumes"].append(
{"name": name, "host": {"sourcePath": host_path}}
)
job_definition["containerProperties"]["mountPoints"].append(
{"sourceVolume": name, "containerPath": container_path}
)
if efs_volumes:
if isinstance(efs_volumes, str):
efs_volumes = [efs_volumes]
for efs_id in efs_volumes:
container_path = "/mnt/" + efs_id
if ":" in efs_id:
efs_id, container_path = efs_id.split(":", 1)
name = "efs_" + efs_id
job_definition["containerProperties"]["volumes"].append(
{
"name": name,
"efsVolumeConfiguration": {
"fileSystemId": efs_id,
"transitEncryption": "ENABLED",
},
}
)
job_definition["containerProperties"]["mountPoints"].append(
{"sourceVolume": name, "containerPath": container_path}
)
if use_tmpfs and (platform == "FARGATE" or platform == "FARGATE_SPOT"):
raise BatchJobException(
"tmpfs is not available for Fargate compute resources"
)
if use_tmpfs or (tmpfs_size and not use_tmpfs):
if tmpfs_size:
if not (isinstance(tmpfs_size, (int, unicode, basestring))):
raise BatchJobException(
"Invalid tmpfs value: ({}) (should be 0 or greater)".format(
tmpfs_size
)
)
else:
# default tmpfs behavior - https://man7.org/linux/man-pages/man5/tmpfs.5.html
tmpfs_size = int(float(memory)) / 2
job_definition["containerProperties"]["linuxParameters"]["tmpfs"] = [
{
"containerPath": tmpfs_path,
"size": int(tmpfs_size),
"mountOptions": [
# should map to rw, suid, dev, exec, auto, nouser, and async
"defaults"
],
}
]
if efa:
if not (isinstance(efa, (int, unicode, basestring))):
raise BatchJobException(
"Invalid efa value: ({}) (should be 0 or greater)".format(efa)
)
else:
if "linuxParameters" not in job_definition["containerProperties"]:
job_definition["containerProperties"]["linuxParameters"] = {}
if (
"devices"
not in job_definition["containerProperties"]["linuxParameters"]
):
job_definition["containerProperties"]["linuxParameters"][
"devices"
] = []
if (num_parallel or 0) > 1:
# Multi-node parallel jobs require the container path and permissions explicitly specified in Job definition
for i in range(int(efa)):
job_definition["containerProperties"]["linuxParameters"][
"devices"
].append(
{
"hostPath": "/dev/infiniband/uverbs{}".format(i),
"containerPath": "/dev/infiniband/uverbs{}".format(i),
"permissions": ["READ", "WRITE", "MKNOD"],
}
)
else:
# Single-node container jobs only require host path in job definition
job_definition["containerProperties"]["linuxParameters"][
"devices"
].append({"hostPath": "/dev/infiniband/uverbs0"})
self.num_parallel = num_parallel or 0
if self.num_parallel >= 1:
job_definition["type"] = "multinode"
job_definition["nodeProperties"] = {
"numNodes": self.num_parallel,
"mainNode": 0,
}
job_definition["nodeProperties"]["nodeRangeProperties"] = [
{
"targetNodes": "0:0", # The properties are same for main node and others,
# but as we use nodeOverrides later for main and others
# differently, also the job definition must match those patterns
"container": job_definition["containerProperties"],
},
]
if self.num_parallel > 1:
job_definition["nodeProperties"]["nodeRangeProperties"].append(
{
"targetNodes": "1:{}".format(self.num_parallel - 1),
"container": job_definition["containerProperties"],
}
)
del job_definition["containerProperties"] # not used for multi-node
# check if job definition already exists
def_name = (
"metaflow_%s"
% hashlib.sha224(str(job_definition).encode("utf-8")).hexdigest()
)
payload = {"jobDefinitionName": def_name, "status": "ACTIVE"}
response = self._client.describe_job_definitions(**payload)
if len(response["jobDefinitions"]) > 0:
return response["jobDefinitions"][0]["jobDefinitionArn"]
# else create a job definition
job_definition["jobDefinitionName"] = def_name
try:
response = self._client.register_job_definition(**job_definition)
except Exception as ex:
if type(ex).__name__ == "ParamValidationError" and (
platform == "FARGATE" or platform == "FARGATE_SPOT"
):
raise BatchJobException(
"%s \nPlease ensure you have installed boto3>=1.16.29 if "
"you intend to launch AWS Batch jobs on AWS Fargate "
"compute platform." % ex
)
else:
raise ex
return response["jobDefinitionArn"]
def job_def(
self,
image,
iam_role,
job_queue,
execution_role,
shared_memory,
max_swap,
swappiness,
inferentia,
efa,
memory,
host_volumes,
efs_volumes,
use_tmpfs,
tmpfs_tempdir,
tmpfs_size,
tmpfs_path,
num_parallel,
ephemeral_storage,
log_driver,
log_options,
privileged,
):
self.payload["jobDefinition"] = self._register_job_definition(
image,
iam_role,
job_queue,
execution_role,
shared_memory,
max_swap,
swappiness,
inferentia,
efa,
memory,
host_volumes,
efs_volumes,
use_tmpfs,
tmpfs_tempdir,
tmpfs_size,
tmpfs_path,
num_parallel,
ephemeral_storage,
log_driver,
log_options,
privileged,
)
return self
def job_name(self, job_name):
self.payload["jobName"] = job_name
return self
def job_queue(self, job_queue):
self.payload["jobQueue"] = job_queue
return self
def image(self, image):
self._image = image
return self
def task_id(self, task_id):
self._task_id = task_id
return self
def iam_role(self, iam_role):
self._iam_role = iam_role
return self
def execution_role(self, execution_role):
self._execution_role = execution_role
return self
def shared_memory(self, shared_memory):
self._shared_memory = shared_memory
return self
def max_swap(self, max_swap):
self._max_swap = max_swap
return self
def swappiness(self, swappiness):
self._swappiness = swappiness
return self
def inferentia(self, inferentia):
self._inferentia = inferentia
return self
def efa(self, efa):
self._efa = efa
return self
def command(self, command):
if "command" not in self.payload["containerOverrides"]:
self.payload["containerOverrides"]["command"] = []
self.payload["containerOverrides"]["command"].extend(command)
return self
def cpu(self, cpu):
if not (isinstance(cpu, (int, unicode, basestring, float)) and float(cpu) > 0):
raise BatchJobException(
"Invalid CPU value ({}); it should be greater than 0".format(cpu)
)
if "resourceRequirements" not in self.payload["containerOverrides"]:
self.payload["containerOverrides"]["resourceRequirements"] = []
# %g will format the value without .0 if it doesn't have a fractional part
#
# While AWS Batch supports fractional values for fargate, it does not
# seem to like seeing values like 2.0 for non-fargate environments.
self.payload["containerOverrides"]["resourceRequirements"].append(
{"value": "%g" % (float(cpu)), "type": "VCPU"}
)
return self
def memory(self, mem):
if not (isinstance(mem, (int, unicode, basestring, float)) and float(mem) > 0):
raise BatchJobException(
"Invalid memory value ({}); it should be greater than 0".format(mem)
)
if "resourceRequirements" not in self.payload["containerOverrides"]:
self.payload["containerOverrides"]["resourceRequirements"] = []
self.payload["containerOverrides"]["resourceRequirements"].append(
{"value": str(int(float(mem))), "type": "MEMORY"}
)
return self
def gpu(self, gpu):
if not (isinstance(gpu, (int, unicode, basestring))):
raise BatchJobException(
"invalid gpu value: ({}) (should be 0 or greater)".format(gpu)
)
if float(gpu) > 0:
if "resourceRequirements" not in self.payload["containerOverrides"]:
self.payload["containerOverrides"]["resourceRequirements"] = []
# Only integer values are supported but the value passed to us
# could be a float-converted-to-string
self.payload["containerOverrides"]["resourceRequirements"].append(
{"type": "GPU", "value": str(int(float(gpu)))}
)
return self
def environment_variable(self, name, value):
if value is None:
return self
if "environment" not in self.payload["containerOverrides"]:
self.payload["containerOverrides"]["environment"] = []
value = str(value)
if value.startswith("$$.") or value.startswith("$."):
# Context Object substitution for AWS Step Functions
# https://docs.aws.amazon.com/step-functions/latest/dg/input-output-contextobject.html
self.payload["containerOverrides"]["environment"].append(
{"name": name, "value.$": value}
)
else:
self.payload["containerOverrides"]["environment"].append(
{"name": name, "value": value}
)
return self
def timeout_in_secs(self, timeout_in_secs):
self.payload["timeout"]["attemptDurationSeconds"] = timeout_in_secs
return self
def tag(self, key, value):
self.payload["tags"][key] = str(value)
return self
def parameter(self, key, value):
self.payload["parameters"][key] = str(value)
return self
def attempts(self, attempts):
self.payload["retryStrategy"]["attempts"] = attempts
return self
class Throttle(object):
def __init__(self, delta_in_secs=1, num_tries=20):
self.delta_in_secs = delta_in_secs
self.num_tries = num_tries
self._now = None
self._reset()
def _reset(self):
self._tries_left = self.num_tries
self._wait = self.delta_in_secs
def __call__(self, func):
def wrapped(*args, **kwargs):
now = time.time()
if self._now is None or (now - self._now > self._wait):
self._now = now
try:
func(*args, **kwargs)
self._reset()
except TriableException as ex:
self._tries_left -= 1
if self._tries_left == 0:
raise ex.ex
self._wait = (self.delta_in_secs * 1.2) ** (
self.num_tries - self._tries_left
) + random.randint(0, 3 * self.delta_in_secs)
return wrapped
class TriableException(Exception):
def __init__(self, ex):
self.ex = ex
class RunningJob(object):
NUM_RETRIES = 8
def __init__(self, id, client):
self._id = id
self._client = client
self._data = {}
def __repr__(self):
return "{}('{}')".format(self.__class__.__name__, self._id)
def _apply(self, data):
self._data = data
@Throttle()
def _update(self):
try:
data = self._client.describe_jobs(jobs=[self._id])
except self._client.exceptions.ClientError as err:
code = err.response["ResponseMetadata"]["HTTPStatusCode"]
if code == 429 or code >= 500:
raise TriableException(err)
raise err
# There have been sporadic reports of empty responses to the
# batch.describe_jobs API call, which can potentially happen if the
# batch.submit_job API call is not strongly consistent(¯\_(ツ)_/¯).
# We add a check here to guard against that. The `update()` call
# will ensure that we poll `batch.describe_jobs` until we get a
# satisfactory response at least once throughout the lifecycle of
# the job.
if len(data["jobs"]) == 1:
self._apply(data["jobs"][0])
def update(self):
self._update()
while not self._data:
self._update()
return self
@property
def id(self):
return self._id
@property
def info(self):
if not self._data:
self.update()
return self._data
@property
def job_name(self):
return self.info["jobName"]
@property
def job_queue(self):
return self.info["jobQueue"]
@property
def status(self):
if not self.is_done:
self.update()
return self.info["status"]
@property
def status_reason(self):
return self.info.get("statusReason")
@property
def created_at(self):
return self.info["createdAt"]
@property
def stopped_at(self):
return self.info.get("stoppedAt", 0)
@property
def is_done(self):
if self.stopped_at == 0:
self.update()
return self.stopped_at > 0
@property
def is_running(self):
return self.status == "RUNNING"
@property
def is_successful(self):
return self.status == "SUCCEEDED"
@property
def is_crashed(self):
# TODO: Check statusmessage to find if the job crashed instead of failing
return self.status == "FAILED"
@property
def reason(self):
if "container" in self.info:
# single-node job
return self.info["container"].get("reason")
else:
# multinode
return self.info["statusReason"]
@property
def status_code(self):
if not self.is_done:
self.update()
if "container" in self.info:
return self.info["container"].get("exitCode")
else:
# multinode
return self.info["attempts"][-1]["container"].get("exitCode")
def kill(self):
if not self.is_done:
self._client.terminate_job(
jobId=self._id, reason="Metaflow initiated job termination."
)
return self.update()
================================================
FILE: metaflow/plugins/aws/batch/batch_decorator.py
================================================
import os
import platform
import sys
import time
from metaflow import R, current
from metaflow.decorators import StepDecorator
from metaflow.metadata_provider import MetaDatum
from metaflow.metadata_provider.util import sync_local_metadata_to_datastore
from metaflow.metaflow_config import (
BATCH_CONTAINER_IMAGE,
BATCH_CONTAINER_REGISTRY,
BATCH_DEFAULT_TAGS,
BATCH_JOB_QUEUE,
DATASTORE_LOCAL_DIR,
ECS_FARGATE_EXECUTION_ROLE,
ECS_S3_ACCESS_IAM_ROLE,
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
)
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
from metaflow.sidecar import Sidecar
from metaflow.unbounded_foreach import UBF_CONTROL
from ..aws_utils import (
compute_resource_attributes,
get_docker_registry,
get_ec2_instance_metadata,
validate_aws_tag,
)
from .batch import BatchException
class BatchDecorator(StepDecorator):
"""
Specifies that this step should execute on [AWS Batch](https://aws.amazon.com/batch/).
Parameters
----------
cpu : int, default 1
Number of CPUs required for this step. If `@resources` is
also present, the maximum value from all decorators is used.
gpu : int, default 0
Number of GPUs required for this step. If `@resources` is
also present, the maximum value from all decorators is used.
memory : int, default 4096
Memory size (in MB) required for this step. If
`@resources` is also present, the maximum value from all decorators is
used.
image : str, optional, default None
Docker image to use when launching on AWS Batch. If not specified, and
METAFLOW_BATCH_CONTAINER_IMAGE is specified, that image is used. If
not, a default Docker image mapping to the current version of Python is used.
queue : str, default METAFLOW_BATCH_JOB_QUEUE
AWS Batch Job Queue to submit the job to.
iam_role : str, default METAFLOW_ECS_S3_ACCESS_IAM_ROLE
AWS IAM role that AWS Batch container uses to access AWS cloud resources.
execution_role : str, default METAFLOW_ECS_FARGATE_EXECUTION_ROLE
AWS IAM role that AWS Batch can use [to trigger AWS Fargate tasks]
(https://docs.aws.amazon.com/batch/latest/userguide/execution-IAM-role.html).
shared_memory : int, optional, default None
The value for the size (in MiB) of the /dev/shm volume for this step.
This parameter maps to the `--shm-size` option in Docker.
max_swap : int, optional, default None
The total amount of swap memory (in MiB) a container can use for this
step. This parameter is translated to the `--memory-swap` option in
Docker where the value is the sum of the container memory plus the
`max_swap` value.
swappiness : int, optional, default None
This allows you to tune memory swappiness behavior for this step.
A swappiness value of 0 causes swapping not to happen unless absolutely
necessary. A swappiness value of 100 causes pages to be swapped very
aggressively. Accepted values are whole numbers between 0 and 100.
aws_batch_tags: Dict[str, str], optional, default None
Sets arbitrary AWS tags on the AWS Batch compute environment.
Set as string key-value pairs.
use_tmpfs : bool, default False
This enables an explicit tmpfs mount for this step. Note that tmpfs is
not available on Fargate compute environments
tmpfs_tempdir : bool, default True
sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
tmpfs_size : int, optional, default None
The value for the size (in MiB) of the tmpfs mount for this step.
This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
memory allocated for this step.
tmpfs_path : str, optional, default None
Path to tmpfs mount for this step. Defaults to /metaflow_temp.
inferentia : int, default 0
Number of Inferentia chips required for this step.
trainium : int, default None
Alias for inferentia. Use only one of the two.
efa : int, default 0
Number of elastic fabric adapter network devices to attach to container
ephemeral_storage : int, default None
The total amount, in GiB, of ephemeral storage to set for the task, 21-200GiB.
This is only relevant for Fargate compute environments
log_driver: str, optional, default None
The log driver to use for the Amazon ECS container.
log_options: List[str], optional, default None
List of strings containing options for the chosen log driver. The configurable values
depend on the `log driver` chosen. Validation of these options is not supported yet.
Example: [`awslogs-group:aws/batch/job`]
privileged: bool, default False
Control whether the task can run as a privileged process on AWS Batch
"""
name = "batch"
defaults = {
"cpu": None,
"gpu": None,
"memory": None,
"image": None,
"queue": BATCH_JOB_QUEUE,
"iam_role": ECS_S3_ACCESS_IAM_ROLE,
"execution_role": ECS_FARGATE_EXECUTION_ROLE,
"shared_memory": None,
"max_swap": None,
"swappiness": None,
"inferentia": None,
"trainium": None, # alias for inferentia
"efa": None,
"host_volumes": None,
"efs_volumes": None,
"use_tmpfs": False,
"aws_batch_tags": None,
"tmpfs_tempdir": True,
"tmpfs_size": None,
"tmpfs_path": "/metaflow_temp",
"ephemeral_storage": None,
"log_driver": None,
"log_options": None,
"executable": None,
"privileged": False,
}
resource_defaults = {
"cpu": "1",
"gpu": "0",
"memory": "4096",
}
package_metadata = None
package_url = None
package_sha = None
run_time_limit = None
# Conda environment support
supports_conda_environment = True
target_platform = "linux-64"
def init(self):
# If no docker image is explicitly specified, impute a default image.
if not self.attributes["image"]:
# If metaflow-config specifies a docker image, just use that.
if BATCH_CONTAINER_IMAGE:
self.attributes["image"] = BATCH_CONTAINER_IMAGE
# If metaflow-config doesn't specify a docker image, assign a
# default docker image.
else:
# Metaflow-R has its own default docker image (rocker family)
if R.use_r():
self.attributes["image"] = R.container_image()
# Default to vanilla Python image corresponding to major.minor
# version of the Python interpreter launching the flow.
else:
self.attributes["image"] = "python:%s.%s" % (
platform.python_version_tuple()[0],
platform.python_version_tuple()[1],
)
# Assign docker registry URL for the image.
if not get_docker_registry(self.attributes["image"]):
if BATCH_CONTAINER_REGISTRY:
self.attributes["image"] = "%s/%s" % (
BATCH_CONTAINER_REGISTRY.rstrip("/"),
self.attributes["image"],
)
# Alias trainium to inferentia and check that both are not in use.
if (
self.attributes["inferentia"] is not None
and self.attributes["trainium"] is not None
):
raise BatchException(
"only specify a value for 'inferentia' or 'trainium', not both."
)
if self.attributes["trainium"] is not None:
self.attributes["inferentia"] = self.attributes["trainium"]
if not isinstance(BATCH_DEFAULT_TAGS, dict) and not all(
isinstance(k, str) and isinstance(v, str)
for k, v in BATCH_DEFAULT_TAGS.items()
):
raise BatchException(
"BATCH_DEFAULT_TAGS environment variable must be Dict[str, str]"
)
if self.attributes["aws_batch_tags"] is not None:
if not isinstance(self.attributes["aws_batch_tags"], dict) and not all(
isinstance(k, str) and isinstance(v, str)
for k, v in self.attributes["aws_batch_tags"].items()
):
raise BatchException("aws_batch_tags must be Dict[str, str]")
else:
self.attributes["aws_batch_tags"] = {}
if BATCH_DEFAULT_TAGS:
self.attributes["aws_batch_tags"] = {
**BATCH_DEFAULT_TAGS,
**self.attributes["aws_batch_tags"],
}
# clean up the alias attribute so it is not passed on.
self.attributes.pop("trainium", None)
# Refer https://github.com/Netflix/metaflow/blob/master/docs/lifecycle.png
# to understand where these functions are invoked in the lifecycle of a
# Metaflow flow.
def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
if flow_datastore.TYPE != "s3":
raise BatchException("The *@batch* decorator requires --datastore=s3.")
# Set internal state.
self.logger = logger
self.environment = environment
self.step = step
self.flow_datastore = flow_datastore
self.attributes.update(
compute_resource_attributes(decos, self, self.resource_defaults)
)
# Set run time limit for the AWS Batch job.
self.run_time_limit = get_run_time_limit_for_task(decos)
if self.run_time_limit < 60:
raise BatchException(
"The timeout for step *{step}* should be at "
"least 60 seconds for execution on AWS Batch.".format(step=step)
)
# Validate tmpfs_path. Batch requires this to be an absolute path
if self.attributes["tmpfs_path"] and self.attributes["tmpfs_path"][0] != "/":
raise BatchException("'tmpfs_path' needs to be an absolute path")
# Validate Batch tags
if self.attributes["aws_batch_tags"]:
for key, val in self.attributes["aws_batch_tags"].items():
validate_aws_tag(key, val)
def runtime_init(self, flow, graph, package, run_id):
# Set some more internal state.
self.flow = flow
self.graph = graph
self.package = package
self.run_id = run_id
def runtime_task_created(
self, task_datastore, task_id, split_index, input_paths, is_cloned, ubf_context
):
if not is_cloned:
self._save_package_once(self.flow_datastore, self.package)
def runtime_step_cli(
self, cli_args, retry_count, max_user_code_retries, ubf_context
):
if retry_count <= max_user_code_retries:
# after all attempts to run the user code have failed, we don't need
# to execute on AWS Batch anymore. We can execute possible fallback
# code locally.
cli_args.commands = ["batch", "step"]
cli_args.command_args.append(self.package_metadata)
cli_args.command_args.append(self.package_sha)
cli_args.command_args.append(self.package_url)
# skip certain keys as CLI arguments
_skip_keys = ["aws_batch_tags"]
cli_args.command_options.update(
{k: v for k, v in self.attributes.items() if k not in _skip_keys}
)
cli_args.command_options["run-time-limit"] = self.run_time_limit
# Pass the supplied AWS batch tags to the step CLI cmd
cli_args.command_options["aws-batch-tag"] = [
"%s=%s" % (k, v) for k, v in self.attributes["aws_batch_tags"].items()
]
if not R.use_r():
cli_args.entrypoint[0] = sys.executable
def task_pre_step(
self,
step_name,
task_datastore,
metadata,
run_id,
task_id,
flow,
graph,
retry_count,
max_retries,
ubf_context,
inputs,
):
self.metadata = metadata
self.task_datastore = task_datastore
# current.tempdir reflects the value of METAFLOW_TEMPDIR (the current working
# directory by default), or the value of tmpfs_path if tmpfs_tempdir=False.
if not self.attributes["tmpfs_tempdir"]:
current._update_env({"tempdir": self.attributes["tmpfs_path"]})
# task_pre_step may run locally if fallback is activated for @catch
# decorator. In that scenario, we skip collecting AWS Batch execution
# metadata. A rudimentary way to detect non-local execution is to
# check for the existence of AWS_BATCH_JOB_ID environment variable.
meta = {}
if "AWS_BATCH_JOB_ID" in os.environ:
meta["aws-batch-job-id"] = os.environ["AWS_BATCH_JOB_ID"]
meta["aws-batch-job-attempt"] = os.environ["AWS_BATCH_JOB_ATTEMPT"]
meta["aws-batch-ce-name"] = os.environ["AWS_BATCH_CE_NAME"]
meta["aws-batch-jq-name"] = os.environ["AWS_BATCH_JQ_NAME"]
meta["aws-batch-execution-env"] = os.environ["AWS_EXECUTION_ENV"]
# Capture AWS Logs metadata. This is best-effort only since
# only V4 of the metadata uri for the ECS container hosts this
# information, and it is quite likely that not all consumers of
# Metaflow would be running the container agent compatible with
# version V4.
# https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint.html
# TODO: Remove dependency on requests
import requests
try:
logs_meta = (
requests.get(url=os.environ["ECS_CONTAINER_METADATA_URI_V4"])
.json()
.get("LogOptions", {})
)
meta["aws-batch-awslogs-group"] = logs_meta.get("awslogs-group")
meta["aws-batch-awslogs-region"] = logs_meta.get("awslogs-region")
meta["aws-batch-awslogs-stream"] = logs_meta.get("awslogs-stream")
except Exception:
pass
instance_meta = get_ec2_instance_metadata()
meta.update(instance_meta)
self._save_logs_sidecar = Sidecar("save_logs_periodically")
self._save_logs_sidecar.start()
# Start spot termination monitor sidecar.
current._update_env(
{"spot_termination_notice": "/tmp/spot_termination_notice"}
)
self._spot_monitor_sidecar = Sidecar("spot_termination_monitor")
self._spot_monitor_sidecar.start()
num_parallel = int(os.environ.get("AWS_BATCH_JOB_NUM_NODES", 0))
if num_parallel >= 1 and ubf_context == UBF_CONTROL:
# UBF handling for multinode case
control_task_id = current.task_id
top_task_id = control_task_id.replace("control-", "") # chop "-0"
mapper_task_ids = [control_task_id] + [
"%s-node-%d" % (top_task_id, node_idx)
for node_idx in range(1, num_parallel)
]
flow._control_mapper_tasks = [
"%s/%s/%s" % (run_id, step_name, mapper_task_id)
for mapper_task_id in mapper_task_ids
]
flow._control_task_is_mapper_zero = True
if num_parallel >= 1:
_setup_multinode_environment()
# current.parallel.node_index will be correctly available over here.
meta.update({"parallel-node-index": current.parallel.node_index})
if len(meta) > 0:
entries = [
MetaDatum(
field=k,
value=v,
type=k,
tags=["attempt_id:{0}".format(retry_count)],
)
for k, v in meta.items()
]
# Register book-keeping metadata for debugging.
metadata.register_metadata(run_id, step_name, task_id, entries)
def task_finished(
self, step_name, flow, graph, is_task_ok, retry_count, max_retries
):
# task_finished may run locally if fallback is activated for @catch
# decorator.
if "AWS_BATCH_JOB_ID" in os.environ:
# If `local` metadata is configured, we would need to copy task
# execution metadata from the AWS Batch container to user's
# local file system after the user code has finished execution.
# This happens via datastore as a communication bridge.
if hasattr(self, "metadata") and self.metadata.TYPE == "local":
# Note that the datastore is *always* Amazon S3 (see
# runtime_task_created function).
sync_local_metadata_to_datastore(
DATASTORE_LOCAL_DIR, self.task_datastore
)
try:
self._save_logs_sidecar.terminate()
self._spot_monitor_sidecar.terminate()
except Exception:
# Best effort kill
pass
if is_task_ok and len(getattr(flow, "_control_mapper_tasks", [])) > 1:
self._wait_for_mapper_tasks(flow, step_name)
def _wait_for_mapper_tasks(self, flow, step_name):
"""
When launching multinode task with UBF, need to wait for the secondary
tasks to finish cleanly and produce their output before exiting the
main task. Otherwise, the main task finishing will cause secondary nodes
to terminate immediately, and possibly prematurely.
"""
from metaflow import Step # avoid circular dependency
TIMEOUT = 600
last_completion_timeout = time.time() + TIMEOUT
print("Waiting for batch secondary tasks to finish")
while last_completion_timeout > time.time():
time.sleep(2)
try:
step_path = "%s/%s/%s" % (flow.name, current.run_id, step_name)
tasks = [task for task in Step(step_path)]
if len(tasks) == len(flow._control_mapper_tasks):
if all(
task.finished_at is not None for task in tasks
): # for some reason task.finished fails
return True
else:
print(
"Waiting for all parallel tasks to finish. Finished: {}/{}".format(
len(tasks),
len(flow._control_mapper_tasks),
)
)
except Exception:
pass
raise Exception(
"Batch secondary workers did not finish in %s seconds" % TIMEOUT
)
@classmethod
def _save_package_once(cls, flow_datastore, package):
if cls.package_url is None:
if not FEAT_ALWAYS_UPLOAD_CODE_PACKAGE:
cls.package_url, cls.package_sha = flow_datastore.save_data(
[package.blob], len_hint=1
)[0]
cls.package_metadata = package.package_metadata
else:
# Blocks until the package is uploaded
cls.package_url = package.package_url()
cls.package_sha = package.package_sha()
cls.package_metadata = package.package_metadata
def _setup_multinode_environment():
# setup the multinode environment variables.
import socket
if "AWS_BATCH_JOB_MAIN_NODE_PRIVATE_IPV4_ADDRESS" not in os.environ:
# we are the main node
local_ips = socket.gethostbyname_ex(socket.gethostname())[-1]
assert local_ips, "Could not find local ip address"
os.environ["MF_PARALLEL_MAIN_IP"] = local_ips[0]
else:
os.environ["MF_PARALLEL_MAIN_IP"] = os.environ[
"AWS_BATCH_JOB_MAIN_NODE_PRIVATE_IPV4_ADDRESS"
]
os.environ["MF_PARALLEL_NUM_NODES"] = os.environ["AWS_BATCH_JOB_NUM_NODES"]
os.environ["MF_PARALLEL_NODE_INDEX"] = os.environ["AWS_BATCH_JOB_NODE_INDEX"]
================================================
FILE: metaflow/plugins/aws/secrets_manager/__init__.py
================================================
================================================
FILE: metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py
================================================
import base64
import json
from json import JSONDecodeError
from metaflow.exception import MetaflowException
from metaflow.metaflow_config import (
AWS_SECRETS_MANAGER_DEFAULT_REGION,
AWS_SECRETS_MANAGER_DEFAULT_ROLE,
)
from metaflow.plugins.secrets import SecretsProvider
import re
class MetaflowAWSSecretsManagerBadResponse(MetaflowException):
"""Raised when the response from AWS Secrets Manager is not valid in some way"""
class MetaflowAWSSecretsManagerDuplicateKey(MetaflowException):
"""Raised when the response from AWS Secrets Manager contains duplicate keys"""
class MetaflowAWSSecretsManagerJSONParseError(MetaflowException):
"""Raised when the SecretString response from AWS Secrets Manager is not valid JSON"""
class MetaflowAWSSecretsManagerNotJSONObject(MetaflowException):
"""Raised when the SecretString response from AWS Secrets Manager is not valid JSON object (dictionary)"""
def _sanitize_key_as_env_var(key):
"""
Sanitize a key as an environment variable name.
This is purely a convenience trade-off to cover common cases well, vs. introducing
ambiguities (e.g. did the final '_' come from '.', or '-' or is original?).
1/27/2023(jackie):
We start with few rules and should *sparingly* add more over time.
Also, it's TBD whether all possible providers will share the same sanitization logic.
Therefore we will keep this function private for now
"""
return key.replace("-", "_").replace(".", "_").replace("/", "_")
class AwsSecretsManagerSecretsProvider(SecretsProvider):
TYPE = "aws-secrets-manager"
def get_secret_as_dict(self, secret_id, options={}, role=None):
"""
Reads a secret from AWS Secrets Manager and returns it as a dictionary of environment variables.
The secret payload from AWS is EITHER a string OR a binary blob.
If the secret contains a string payload ("SecretString"):
- if the `json` option is True (default):
{SecretString} will be parsed as a JSON. If successfully parsed, AND the JSON contains a
top-level object, each entry K/V in the object will also be converted to an entry in the result. V will
always be casted to a string (if not already a string).
- If `json` option is False:
{SecretString} will be returned as a single entry in the result, where the key is either:
- the `secret_id`, OR
- the value set by `options={"env_var_name": custom_env_var_name}`.
Otherwise, if the secret contains a binary blob payload ("SecretBinary"):
- The result dict contains '{SecretName}': '{SecretBinary}', where {SecretBinary} is a base64-encoded string.
All keys in the result are sanitized to be more valid environment variable names. This is done on a best-effort
basis. Further validation is expected to be done by the invoking @secrets decorator itself.
:param secret_id: ARN or friendly name of the secret.
:param options: Dictionary of additional options. E.g., `options={"env_var_name": custom_env_var_name}`.
:param role: AWS IAM Role ARN to assume before reading the secret.
:return: Dictionary of environment variables. All keys and values are strings.
"""
import botocore
from metaflow.plugins.aws.aws_client import get_aws_client
effective_aws_region = None
# arn:aws:secretsmanager:::secret:SecretName-6RandomCharacters
m = re.match("arn:aws:secretsmanager:([^:]+):", secret_id)
if m:
effective_aws_region = m.group(1)
elif "region" in options:
effective_aws_region = options["region"]
else:
effective_aws_region = AWS_SECRETS_MANAGER_DEFAULT_REGION
# At the end of all that, `effective_aws_region` may still be None.
# This might still be OK, if there is fallback AWS region info in environment like:
# .aws/config or AWS_REGION env var or AWS_DEFAULT_REGION env var, etc.
try:
if AWS_SECRETS_MANAGER_DEFAULT_ROLE and not role:
role = AWS_SECRETS_MANAGER_DEFAULT_ROLE
secrets_manager_client = get_aws_client(
"secretsmanager",
client_params={"region_name": effective_aws_region},
role_arn=role,
)
except botocore.exceptions.NoRegionError:
# We try our best with a nice error message.
# When run in Kubernetes or Argo Workflows, the traceback is still monstrous.
# TODO: Find a way to show a concise error in logs
raise MetaflowException(
"Default region is not specified for AWS Secrets Manager. Please set METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION"
)
result = {}
def _sanitize_and_add_entry_to_result(k, v):
# Two jobs - sanitize, and check for dupes
sanitized_k = _sanitize_key_as_env_var(k)
if sanitized_k in result:
raise MetaflowAWSSecretsManagerDuplicateKey(
"Duplicate key in secret: '%s' (sanitizes to '%s')"
% (k, sanitized_k)
)
result[sanitized_k] = v
"""
These are the exceptions that can be raised by the AWS SDK:
SecretsManager.Client.exceptions.ResourceNotFoundException
SecretsManager.Client.exceptions.InvalidParameterException
SecretsManager.Client.exceptions.InvalidRequestException
SecretsManager.Client.exceptions.DecryptionFailure
SecretsManager.Client.exceptions.InternalServiceError
Looks pretty informative already, so we won't catch here directly.
1/27/2023(jackie) - We will evolve this over time as we learn more.
"""
response = secrets_manager_client.get_secret_value(SecretId=secret_id)
if "Name" not in response:
raise MetaflowAWSSecretsManagerBadResponse(
"Secret 'Name' is missing in response"
)
secret_name = response["Name"]
if "SecretString" in response:
secret_str = response["SecretString"]
if options.get("json", True):
try:
obj = json.loads(secret_str)
if type(obj) == dict:
for k, v in obj.items():
# We try to make it work here - cast to string always
_sanitize_and_add_entry_to_result(k, str(v))
else:
raise MetaflowAWSSecretsManagerNotJSONObject(
"Secret string is a JSON, but not an object (dict-like) - actual type %s."
% type(obj)
)
except JSONDecodeError:
raise MetaflowAWSSecretsManagerJSONParseError(
"Secret string could not be parsed as JSON"
)
else:
if options.get("env_var_name"):
env_var_name = options["env_var_name"]
else:
env_var_name = secret_name
_sanitize_and_add_entry_to_result(env_var_name, secret_str)
elif "SecretBinary" in response:
# boto3 docs say response gives base64 encoded, but it's wrong.
# See https://github.com/boto/boto3/issues/2735
# In reality, we get raw bytes. We will encode it ourselves to become env var ready.
# Note env vars values may not contain null bytes.... therefore we cannot leave it as
# bytes.
#
# The trailing decode gives us a final UTF-8 string.
if options.get("env_var_name"):
env_var_name = options["env_var_name"]
else:
env_var_name = secret_name
_sanitize_and_add_entry_to_result(
env_var_name, base64.b64encode(response["SecretBinary"]).decode()
)
else:
raise MetaflowAWSSecretsManagerBadResponse(
"Secret response is missing both 'SecretString' and 'SecretBinary'"
)
return result
================================================
FILE: metaflow/plugins/aws/step_functions/__init__.py
================================================
================================================
FILE: metaflow/plugins/aws/step_functions/dynamo_db_client.py
================================================
import time
from metaflow.metaflow_config import SFN_DYNAMO_DB_TABLE
class DynamoDbClient(object):
def __init__(self):
from ..aws_client import get_aws_client
self._client = get_aws_client("dynamodb")
self.name = SFN_DYNAMO_DB_TABLE
def save_foreach_cardinality(self, foreach_split_task_id, foreach_cardinality, ttl):
return self._client.put_item(
TableName=self.name,
Item={
"pathspec": {"S": foreach_split_task_id},
"for_each_cardinality": {
"NS": list(map(str, range(foreach_cardinality)))
},
"ttl": {"N": str(ttl)},
},
)
def save_parent_task_id_for_foreach_join(
self, foreach_split_task_id, foreach_join_parent_task_id
):
ex = None
for attempt in range(10):
try:
return self._client.update_item(
TableName=self.name,
Key={"pathspec": {"S": foreach_split_task_id}},
UpdateExpression="ADD parent_task_ids_for_foreach_join :val",
ExpressionAttributeValues={
":val": {"SS": [foreach_join_parent_task_id]}
},
)
except self._client.exceptions.ClientError as error:
ex = error
if (
error.response["Error"]["Code"]
== "ProvisionedThroughputExceededException"
):
# hopefully, enough time for AWS to scale up! otherwise
# ensure sufficient on-demand throughput for dynamo db
# is provisioned ahead of time
sleep_time = min((2**attempt) * 10, 60)
time.sleep(sleep_time)
else:
raise
raise ex
def get_parent_task_ids_for_foreach_join(self, foreach_split_task_id):
response = self._client.get_item(
TableName=self.name,
Key={"pathspec": {"S": foreach_split_task_id}},
ProjectionExpression="parent_task_ids_for_foreach_join",
ConsistentRead=True,
)
return response["Item"]["parent_task_ids_for_foreach_join"]["SS"]
================================================
FILE: metaflow/plugins/aws/step_functions/event_bridge_client.py
================================================
import base64
import json
from hashlib import sha1
from metaflow.util import to_bytes, to_unicode
class EventBridgeClient(object):
def __init__(self, name):
from ..aws_client import get_aws_client
self._client = get_aws_client("events")
self.name = format(name)
def cron(self, cron):
self.cron = cron
return self
def role_arn(self, role_arn):
self.role_arn = role_arn
return self
def state_machine_arn(self, state_machine_arn):
self.state_machine_arn = state_machine_arn
return self
def schedule(self):
if not self.cron:
# reset the schedule
self._disable()
else:
self._set()
return self.name
def _disable(self):
try:
self._client.disable_rule(Name=self.name)
except self._client.exceptions.ResourceNotFoundException:
pass
def _set(self):
# Generate a new rule or update existing rule.
self._client.put_rule(
Name=self.name,
ScheduleExpression="cron(%s)" % self.cron,
Description="Metaflow generated rule for %s" % self.name,
State="ENABLED",
)
# Assign AWS Step Functions ARN to the rule as a target.
self._client.put_targets(
Rule=self.name,
Targets=[
{
"Id": self.name,
"Arn": self.state_machine_arn,
# Set input parameters to empty.
"Input": json.dumps({"Parameters": json.dumps({})}),
"RoleArn": self.role_arn,
}
],
)
def delete(self):
try:
response = self._client.remove_targets(
Rule=self.name,
Ids=[self.name],
)
if response.get("FailedEntryCount", 0) > 0:
raise RuntimeError("Failed to remove targets from rule %s" % self.name)
return self._client.delete_rule(Name=self.name)
except self._client.exceptions.ResourceNotFoundException:
# Ignore if the rule does not exist.
return None
def format(name):
# AWS Event Bridge has a limit of 64 chars for rule names.
# We truncate the rule name if the computed name is greater
# than 64 chars and append a hashed suffix to ensure uniqueness.
if len(name) > 64:
name_hash = to_unicode(base64.b32encode(sha1(to_bytes(name)).digest()))[
:16
].lower()
# construct an 64 character long rule name
return "%s-%s" % (name[:47], name_hash)
else:
return name
================================================
FILE: metaflow/plugins/aws/step_functions/production_token.py
================================================
import json
import os
import random
import string
import zlib
from itertools import dropwhile
from metaflow.util import to_bytes
def _token_generator(token_prefix):
for i in range(10000):
prefix = "%s-%d-" % (token_prefix, i)
# we need to use a consistent hash here, which is why
# random.seed(prefix) or random.seed(hash(prefix)) won't work
random.seed(zlib.adler32(to_bytes(prefix)))
yield prefix + "".join(random.sample(string.ascii_lowercase, 4))
def _makedirs(path):
# this is for python2 compatibility.
# Python3 has os.makedirs(exist_ok=True).
try:
os.makedirs(path)
except OSError as x:
if x.errno == 17:
return
else:
raise
def _load_config(path):
if os.path.exists(path):
with open(path) as f:
return json.load(f)
else:
return {}
def _path(token_prefix):
# TODO make this a MF config variable
if os.environ.get("METAFLOW_TOKEN_HOME"):
home = os.environ.get("METAFLOW_TOKEN_HOME")
else:
home = os.environ.get("METAFLOW_HOME", "~/.metaflowconfig")
return os.path.expanduser("%s/%s" % (home, token_prefix))
def new_token(token_prefix, prev_token=None):
if prev_token is None:
for token in _token_generator(token_prefix):
return token
else:
it = dropwhile(lambda x: x != prev_token, _token_generator(token_prefix))
for _ in it:
return next(it)
else:
return None
def load_token(token_prefix):
config = _load_config(_path(token_prefix))
return config.get("production_token")
def store_token(token_prefix, token):
path = _path(token_prefix)
config = _load_config(path)
config["production_token"] = token
_makedirs(os.path.dirname(path))
with open(path, "w") as f:
json.dump(config, f)
================================================
FILE: metaflow/plugins/aws/step_functions/schedule_decorator.py
================================================
from metaflow.decorators import FlowDecorator
# TODO (savin): Lift this decorator up since it's also used by Argo now
class ScheduleDecorator(FlowDecorator):
"""
Specifies the times when the flow should be run when running on a
production scheduler.
Parameters
----------
hourly : bool, default False
Run the workflow hourly.
daily : bool, default True
Run the workflow daily.
weekly : bool, default False
Run the workflow weekly.
cron : str, optional, default None
Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
specified by this expression.
timezone : str, optional, default None
Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
"""
name = "schedule"
defaults = {
"cron": None,
"weekly": False,
"daily": True,
"hourly": False,
"timezone": None,
}
def flow_init(
self, flow, graph, environment, flow_datastore, metadata, logger, echo, options
):
# Currently supports quartz cron expressions in UTC as defined in
# https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions
if self.attributes["cron"]:
self.schedule = self.attributes["cron"]
elif self.attributes["weekly"]:
self.schedule = "0 0 ? * SUN *"
elif self.attributes["hourly"]:
self.schedule = "0 * * * ? *"
elif self.attributes["daily"]:
self.schedule = "0 0 * * ? *"
else:
self.schedule = None
# Argo Workflows supports the IANA timezone standard, e.g. America/Los_Angeles
self.timezone = self.attributes["timezone"]
================================================
FILE: metaflow/plugins/aws/step_functions/set_batch_environment.py
================================================
import json
import os
import sys
from .dynamo_db_client import DynamoDbClient
def export_parameters(output_file):
input = json.loads(os.environ.get("METAFLOW_PARAMETERS", "{}"))
params = json.loads(os.environ.get("METAFLOW_DEFAULT_PARAMETERS", "{}"))
params.update(input)
with open(output_file, "w") as f:
for k in params:
# Replace `-` with `_` is parameter names since `-` isn't an
# allowed character for environment variables. cli.py will
# correctly translate the replaced `-`s.
f.write(
"export METAFLOW_INIT_%s=%s\n"
% (k.upper().replace("-", "_"), json.dumps(params[k]))
)
os.chmod(output_file, 509)
def export_parent_task_ids(output_file):
input = os.environ["METAFLOW_SPLIT_PARENT_TASK_ID"]
task_ids = DynamoDbClient().get_parent_task_ids_for_foreach_join(input)
with open(output_file, "w") as f:
f.write("export METAFLOW_PARENT_TASK_IDS=%s" % ",".join(task_ids))
os.chmod(output_file, 509)
# TODO: Maybe use click someday instead of conditional.
if __name__ == "__main__":
if sys.argv[1] == "parameters":
export_parameters(sys.argv[2])
elif sys.argv[1] == "parent_tasks":
export_parent_task_ids(sys.argv[2])
================================================
FILE: metaflow/plugins/aws/step_functions/step_functions.py
================================================
import hashlib
import json
import os
import random
import string
import sys
from collections import defaultdict
from metaflow import R
from metaflow.decorators import flow_decorators
from metaflow.exception import MetaflowException
from metaflow.metaflow_config import (
EVENTS_SFN_ACCESS_IAM_ROLE,
S3_ENDPOINT_URL,
SFN_DYNAMO_DB_TABLE,
SFN_EXECUTION_LOG_GROUP_ARN,
SFN_IAM_ROLE,
SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH,
)
from metaflow.parameters import deploy_time_eval
from metaflow.user_configs.config_options import ConfigInput
from metaflow.util import dict_to_cli_options, to_pascalcase
from ..batch.batch import Batch
from .event_bridge_client import EventBridgeClient
from .step_functions_client import StepFunctionsClient
class StepFunctionsException(MetaflowException):
headline = "AWS Step Functions error"
class StepFunctionsSchedulingException(MetaflowException):
headline = "AWS Step Functions scheduling error"
class StepFunctions(object):
def __init__(
self,
name,
graph,
flow,
code_package_metadata,
code_package_sha,
code_package_url,
production_token,
metadata,
flow_datastore,
environment,
event_logger,
monitor,
tags=None,
aws_batch_tags=None,
namespace=None,
username=None,
max_workers=None,
workflow_timeout=None,
is_project=False,
use_distributed_map=False,
compress_state_machine=False,
):
self.name = name
self.graph = graph
self.flow = flow
self.code_package_metadata = code_package_metadata
self.code_package_sha = code_package_sha
self.code_package_url = code_package_url
self.production_token = production_token
self.metadata = metadata
self.flow_datastore = flow_datastore
self.environment = environment
self.event_logger = event_logger
self.monitor = monitor
self.tags = tags
self.aws_batch_tags = aws_batch_tags or {}
self.namespace = namespace
self.username = username
self.max_workers = max_workers
self.workflow_timeout = workflow_timeout
self.config_parameters = self._process_config_parameters()
# https://aws.amazon.com/blogs/aws/step-functions-distributed-map-a-serverless-solution-for-large-scale-parallel-data-processing/
self.use_distributed_map = use_distributed_map
# S3 command upload configuration
self.compress_state_machine = compress_state_machine
self._client = StepFunctionsClient()
self._workflow = self._compile()
self._cron = self._cron()
self._state_machine_arn = None
def to_json(self):
return self._workflow.to_json(pretty=True)
def trigger_explanation(self):
if self._cron:
# Sometime in the future, we should vendor (or write) a utility
# that can translate cron specifications into a human-readable
# format and push to the user for a better UX, someday.
return (
"This workflow triggers automatically "
"via a cron schedule *%s* defined in AWS EventBridge."
% self.event_bridge_rule
)
else:
return "No triggers defined. " "You need to launch this workflow manually."
def deploy(self, log_execution_history):
if SFN_IAM_ROLE is None:
raise StepFunctionsException(
"No IAM role found for AWS Step "
"Functions. You can create one "
"following the instructions listed at "
"*https://docs.outerbounds.com/enginee"
"ring/deployment/aws-managed/cloudform"
"ation/* and "
"re-configure Metaflow using "
"*metaflow configure aws* on your "
"terminal."
)
if log_execution_history:
if SFN_EXECUTION_LOG_GROUP_ARN is None:
raise StepFunctionsException(
"No AWS CloudWatch Logs log "
"group ARN found for emitting "
"state machine execution logs for "
"your workflow. You can set it in "
"your environment by using the "
"METAFLOW_SFN_EXECUTION_LOG_GROUP_ARN "
"environment variable."
)
try:
self._state_machine_arn = self._client.push(
name=self.name,
definition=self.to_json(),
role_arn=SFN_IAM_ROLE,
log_execution_history=log_execution_history,
)
except Exception as e:
raise StepFunctionsException(repr(e))
def schedule(self):
# Scheduling is currently enabled via AWS Event Bridge.
if EVENTS_SFN_ACCESS_IAM_ROLE is None:
raise StepFunctionsSchedulingException(
"No IAM role found for AWS "
"Events Bridge. You can "
"create one following the "
"instructions listed at "
"*https://docs.outerboun"
"ds.com/engineering/depl"
"oyment/aws-managed/clou"
"dformation/* and "
"re-configure Metaflow "
"using *metaflow configure "
"aws* on your terminal."
)
try:
self.event_bridge_rule = (
EventBridgeClient(self.name)
.cron(self._cron)
.role_arn(EVENTS_SFN_ACCESS_IAM_ROLE)
.state_machine_arn(self._state_machine_arn)
.schedule()
)
except Exception as e:
raise StepFunctionsSchedulingException(repr(e))
@classmethod
def delete(cls, name):
# Always attempt to delete the event bridge rule.
schedule_deleted = EventBridgeClient(name).delete()
sfn_deleted = StepFunctionsClient().delete(name)
if sfn_deleted is None:
raise StepFunctionsException(
"The workflow *%s* doesn't exist on AWS Step Functions." % name
)
return schedule_deleted, sfn_deleted
@classmethod
def terminate(cls, flow_name, name):
client = StepFunctionsClient()
execution_arn, _, _, _ = cls.get_execution(flow_name, name)
response = client.terminate_execution(execution_arn)
return response
@classmethod
def trigger(cls, name, parameters):
try:
state_machine = StepFunctionsClient().get(name)
except Exception as e:
raise StepFunctionsException(repr(e))
if state_machine is None:
raise StepFunctionsException(
"The workflow *%s* doesn't exist "
"on AWS Step Functions. Please "
"deploy your flow first." % name
)
# Dump parameters into `Parameters` input field.
input = json.dumps({"Parameters": json.dumps(parameters)})
# AWS Step Functions limits input to be 32KiB, but AWS Batch
# has its own limitation of 30KiB for job specification length.
# Reserving 10KiB for rest of the job specification leaves 20KiB
# for us, which should be enough for most use cases for now.
if len(input) > 20480:
raise StepFunctionsException(
"Length of parameter names and "
"values shouldn't exceed 20480 as "
"imposed by AWS Step Functions."
)
try:
state_machine_arn = state_machine.get("stateMachineArn")
return StepFunctionsClient().trigger(state_machine_arn, input)
except Exception as e:
raise StepFunctionsException(repr(e))
@classmethod
def list(cls, name, states):
try:
state_machine = StepFunctionsClient().get(name)
except Exception as e:
raise StepFunctionsException(repr(e))
if state_machine is None:
raise StepFunctionsException(
"The workflow *%s* doesn't exist " "on AWS Step Functions." % name
)
try:
state_machine_arn = state_machine.get("stateMachineArn")
return StepFunctionsClient().list_executions(state_machine_arn, states)
except Exception as e:
raise StepFunctionsException(repr(e))
@classmethod
def get_existing_deployment(cls, name):
workflow = StepFunctionsClient().get(name)
if workflow is not None:
try:
start = json.loads(workflow["definition"])["States"]["start"]
parameters = start["Parameters"]["Parameters"]
return parameters.get("metaflow.owner"), parameters.get(
"metaflow.production_token"
)
except KeyError:
raise StepFunctionsException(
"An existing non-metaflow "
"workflow with the same name as "
"*%s* already exists in AWS Step "
"Functions. Please modify the "
"name of this flow or delete your "
"existing workflow on AWS Step "
"Functions." % name
)
return None
@classmethod
def get_execution(cls, state_machine_name, name):
client = StepFunctionsClient()
try:
state_machine = client.get(state_machine_name)
except Exception as e:
raise StepFunctionsException(repr(e))
if state_machine is None:
raise StepFunctionsException(
"The state machine *%s* doesn't exist on AWS Step Functions."
% state_machine_name
)
try:
state_machine_arn = state_machine.get("stateMachineArn")
environment_vars = (
json.loads(state_machine.get("definition"))
.get("States")
.get("start")
.get("Parameters")
.get("ContainerOverrides")
.get("Environment")
)
parameters = {
item.get("Name"): item.get("Value") for item in environment_vars
}
executions = client.list_executions(state_machine_arn, states=["RUNNING"])
for execution in executions:
if execution.get("name") == name:
try:
return (
execution.get("executionArn"),
parameters.get("METAFLOW_OWNER"),
parameters.get("METAFLOW_PRODUCTION_TOKEN"),
parameters.get("SFN_STATE_MACHINE"),
)
except KeyError:
raise StepFunctionsException(
"A non-metaflow workflow *%s* already exists in AWS Step Functions."
% name
)
return None
except Exception as e:
raise StepFunctionsException(repr(e))
def _compile(self):
if self.flow._flow_decorators.get("trigger") or self.flow._flow_decorators.get(
"trigger_on_finish"
):
raise StepFunctionsException(
"Deploying flows with @trigger or @trigger_on_finish decorator(s) "
"to AWS Step Functions is not supported currently."
)
if self.flow._flow_decorators.get("exit_hook"):
raise StepFunctionsException(
"Deploying flows with the @exit_hook decorator "
"to AWS Step Functions is not currently supported."
)
# Visit every node of the flow and recursively build the state machine.
def _visit(node, workflow, exit_node=None):
if node.parallel_foreach:
raise StepFunctionsException(
"Deploying flows with @parallel decorator(s) "
"to AWS Step Functions is not supported currently."
)
if node.type == "split-switch":
raise StepFunctionsException(
"Deploying flows with switch statement "
"to AWS Step Functions is not supported currently."
)
# Assign an AWS Batch job to the AWS Step Functions state
# and pass the intermediate state by exposing `JobId` and
# `Parameters` to the child job(s) as outputs. `Index` and
# `SplitParentTaskId` are populated optionally, when available.
# We can't modify the names of keys in AWS Step Functions aside
# from a blessed few which are set as `Parameters` for the Map
# state. That's why even though `JobId` refers to the parent task
# id, we can't call it as such. Similar situation for `Parameters`.
state = (
State(node.name)
.batch(self._batch(node))
.output_path(
"$.['JobId', " "'Parameters', " "'Index', " "'SplitParentTaskId']"
)
)
# End the (sub)workflow if we have reached the end of the flow or
# the parent step of matching_join of the sub workflow.
if node.type == "end" or exit_node in node.out_funcs:
workflow.add_state(state.end())
# Continue linear assignment within the (sub)workflow if the node
# doesn't branch or fork.
elif node.type in ("start", "linear", "join"):
workflow.add_state(state.next(node.out_funcs[0]))
_visit(self.graph[node.out_funcs[0]], workflow, exit_node)
# Create a `Parallel` state and assign sub workflows if the node
# branches out.
elif node.type == "split":
branch_name = hashlib.sha224(
"&".join(node.out_funcs).encode("utf-8")
).hexdigest()
workflow.add_state(state.next(branch_name))
branch = Parallel(branch_name).next(node.matching_join)
# Generate as many sub workflows as branches and recurse.
for n in node.out_funcs:
branch.branch(
_visit(
self.graph[n], Workflow(n).start_at(n), node.matching_join
)
)
workflow.add_state(branch)
# Continue the traversal from the matching_join.
_visit(self.graph[node.matching_join], workflow, exit_node)
# Create a `Map` state and assign sub workflow if the node forks.
elif node.type == "foreach":
# Fetch runtime cardinality via an AWS DynamoDb Get call before
# configuring the node
cardinality_state_name = "#%s" % node.out_funcs[0]
workflow.add_state(state.next(cardinality_state_name))
cardinality_state = (
State(cardinality_state_name)
.dynamo_db(SFN_DYNAMO_DB_TABLE, "$.JobId", "for_each_cardinality")
.result_path("$.Result")
)
iterator_name = "*%s" % node.out_funcs[0]
workflow.add_state(cardinality_state.next(iterator_name))
workflow.add_state(
Map(iterator_name)
.items_path("$.Result.Item.for_each_cardinality.NS")
.parameter("JobId.$", "$.JobId")
.parameter("SplitParentTaskId.$", "$.JobId")
.parameter("Parameters.$", "$.Parameters")
.parameter("Index.$", "$$.Map.Item.Value")
.next(
"%s_*GetManifest" % iterator_name
if self.use_distributed_map
else node.matching_join
)
.iterator(
_visit(
self.graph[node.out_funcs[0]],
Workflow(node.out_funcs[0])
.start_at(node.out_funcs[0])
.mode(
"DISTRIBUTED" if self.use_distributed_map else "INLINE"
),
node.matching_join,
)
)
.max_concurrency(self.max_workers)
# AWS Step Functions has a short coming for DistributedMap at the
# moment that does not allow us to subset the output of for-each
# to just a single element. We have to rely on a rather terrible
# hack and resort to using ResultWriter to write the state to
# Amazon S3 and process it in another task. But, well what can we
# do...
.result_writer(
*(
(
(
SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH[len("s3://") :]
if SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH.startswith(
"s3://"
)
else SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH
).split("/", 1)
+ [""]
)[:2]
if self.use_distributed_map
else (None, None)
)
)
.output_path("$" if self.use_distributed_map else "$.[0]")
)
if self.use_distributed_map:
workflow.add_state(
State("%s_*GetManifest" % iterator_name)
.resource("arn:aws:states:::aws-sdk:s3:getObject")
.parameter("Bucket.$", "$.ResultWriterDetails.Bucket")
.parameter("Key.$", "$.ResultWriterDetails.Key")
.next("%s_*Map" % iterator_name)
.result_selector("Body.$", "States.StringToJson($.Body)")
)
workflow.add_state(
Map("%s_*Map" % iterator_name)
.iterator(
Workflow("%s_*PassWorkflow" % iterator_name)
.mode("DISTRIBUTED")
.start_at("%s_*Pass" % iterator_name)
.add_state(
Pass("%s_*Pass" % iterator_name)
.end()
.parameter("Output.$", "States.StringToJson($.Output)")
.output_path("$.Output")
)
)
.next(node.matching_join)
.max_concurrency(1000)
.item_reader(
JSONItemReader()
.resource("arn:aws:states:::s3:getObject")
.parameter("Bucket.$", "$.Body.DestinationBucket")
.parameter("Key.$", "$.Body.ResultFiles.SUCCEEDED[0].Key")
)
.output_path("$.[0]")
)
# Continue the traversal from the matching_join.
_visit(self.graph[node.matching_join], workflow, exit_node)
# We shouldn't ideally ever get here.
else:
raise StepFunctionsException(
"Node type *%s* for step *%s* "
"is not currently supported by "
"AWS Step Functions." % (node.type, node.name)
)
return workflow
workflow = Workflow(self.name).start_at("start")
if self.workflow_timeout:
workflow.timeout_seconds(self.workflow_timeout)
return _visit(self.graph["start"], workflow)
def _cron(self):
schedule = self.flow._flow_decorators.get("schedule")
if schedule:
schedule = schedule[0]
if schedule.timezone is not None:
raise StepFunctionsException(
"Step Functions does not support scheduling with a timezone."
)
return schedule.schedule
return None
def _process_parameters(self):
parameters = []
has_schedule = self._cron() is not None
seen = set()
for var, param in self.flow._get_parameters():
# Throw an exception if the parameter is specified twice.
norm = param.name.lower()
if norm in seen:
raise MetaflowException(
"Parameter *%s* is specified twice. "
"Note that parameter names are "
"case-insensitive." % param.name
)
seen.add(norm)
# NOTE: We skip config parameters as these do not have dynamic values,
# and need to be treated differently.
if param.IS_CONFIG_PARAMETER:
continue
is_required = param.kwargs.get("required", False)
# Throw an exception if a schedule is set for a flow with required
# parameters with no defaults. We currently don't have any notion
# of data triggers in AWS Event Bridge.
if "default" not in param.kwargs and is_required and has_schedule:
raise MetaflowException(
"The parameter *%s* does not have a "
"default and is required. Scheduling "
"such parameters via AWS Event Bridge "
"is not currently supported." % param.name
)
value = deploy_time_eval(param.kwargs.get("default"))
parameters.append(dict(name=param.name, value=value))
return parameters
def _process_config_parameters(self):
parameters = []
seen = set()
for var, param in self.flow._get_parameters():
if not param.IS_CONFIG_PARAMETER:
continue
# Throw an exception if the parameter is specified twice.
norm = param.name.lower()
if norm in seen:
raise MetaflowException(
"Parameter *%s* is specified twice. "
"Note that parameter names are "
"case-insensitive." % param.name
)
seen.add(norm)
parameters.append(
dict(name=param.name, kv_name=ConfigInput.make_key_name(param.name))
)
return parameters
def _batch(self, node):
attrs = {
# metaflow.user is only used for setting the AWS Job Name.
# Since job executions are no longer tied to a specific user
# identity, we will just set their user to `SFN`. We still do need
# access to the owner of the workflow for production tokens, which
# we can stash in metaflow.owner.
"metaflow.user": "SFN",
"metaflow.owner": self.username,
"metaflow.flow_name": self.flow.name,
"metaflow.step_name": node.name,
# Unfortunately we can't set the task id here since AWS Step
# Functions lacks any notion of run-scoped task identifiers. We
# instead co-opt the AWS Batch job id as the task id. This also
# means that the AWS Batch job name will have missing fields since
# the job id is determined at job execution, but since the job id is
# part of the job description payload, we don't lose much except for
# a few ugly looking black fields in the AWS Batch UI.
# Also, unfortunately we can't set the retry count since
# `$$.State.RetryCount` resolves to an int dynamically and
# AWS Batch job specification only accepts strings. We handle
# retries/catch within AWS Batch to get around this limitation.
# And, we also cannot set the run id here since the run id maps to
# the execution name of the AWS Step Functions State Machine, which
# is different when executing inside a distributed map. We set it once
# in the start step and move it along to be consumed by all the children.
"metaflow.version": self.environment.get_environment_info()[
"metaflow_version"
],
# We rely on step names and task ids of parent steps to construct
# input paths for a task. Since the only information we can pass
# between states (via `InputPath` and `ResultPath`) in AWS Step
# Functions is the job description, we run the risk of exceeding
# 32K state size limit rather quickly if we don't filter the job
# description to a minimal set of fields. Unfortunately, the partial
# `JsonPath` implementation within AWS Step Functions makes this
# work a little non-trivial; it doesn't like dots in keys, so we
# have to add the field again.
# This pattern is repeated in a lot of other places, where we use
# AWS Batch parameters to store AWS Step Functions state
# information, since this field is the only field in the AWS Batch
# specification that allows us to set key-values.
"step_name": node.name,
}
# Store production token within the `start` step, so that subsequent
# `step-functions create` calls can perform a rudimentary authorization
# check.
if node.name == "start":
attrs["metaflow.production_token"] = self.production_token
# Add env vars from the optional @environment decorator.
env_deco = [deco for deco in node.decorators if deco.name == "environment"]
env = {}
if env_deco:
env = env_deco[0].attributes["vars"].copy()
# add METAFLOW_S3_ENDPOINT_URL
if S3_ENDPOINT_URL is not None:
env["METAFLOW_S3_ENDPOINT_URL"] = S3_ENDPOINT_URL
if node.name == "start":
# metaflow.run_id maps to AWS Step Functions State Machine Execution in all
# cases except for when within a for-each construct that relies on
# Distributed Map. To work around this issue, we pass the run id from the
# start step to all subsequent tasks.
attrs["metaflow.run_id.$"] = "$$.Execution.Name"
# Initialize parameters for the flow in the `start` step.
parameters = self._process_parameters()
if parameters:
# Get user-defined parameters from State Machine Input.
# Since AWS Step Functions doesn't allow for optional inputs
# currently, we have to unfortunately place an artificial
# constraint that every parameterized workflow needs to include
# `Parameters` as a key in the input to the workflow.
# `step-functions trigger` already takes care of this
# requirement, but within the UI, the users will be required to
# specify an input with key as `Parameters` and value as a
# stringified json of the actual parameters -
# {"Parameters": "{\"alpha\": \"beta\"}"}
env["METAFLOW_PARAMETERS"] = "$.Parameters"
default_parameters = {}
for parameter in parameters:
if parameter["value"] is not None:
default_parameters[parameter["name"]] = parameter["value"]
# Dump the default values specified in the flow.
env["METAFLOW_DEFAULT_PARAMETERS"] = json.dumps(default_parameters)
# `start` step has no upstream input dependencies aside from
# parameters.
input_paths = None
else:
# We need to rely on the `InputPath` of the AWS Step Functions
# specification to grab task ids and the step names of the parent
# to properly construct input_paths at runtime. Thanks to the
# JsonPath-foo embedded in the parent states, we have this
# information easily available.
if node.parallel_foreach:
raise StepFunctionsException(
"Parallel steps are not supported yet with AWS step functions."
)
# Handle foreach join.
if (
node.type == "join"
and self.graph[node.split_parents[-1]].type == "foreach"
):
input_paths = (
"sfn-${METAFLOW_RUN_ID}/%s/:"
"${METAFLOW_PARENT_TASK_IDS}" % node.in_funcs[0]
)
# Unfortunately, AWS Batch only allows strings as value types
# in its specification, and we don't have any way to concatenate
# the task ids array from the parent steps within AWS Step
# Functions and pass it down to AWS Batch. We instead have to
# rely on publishing the state to DynamoDb and fetching it back
# in within the AWS Batch entry point to set
# `METAFLOW_PARENT_TASK_IDS`. The state is scoped to the parent
# foreach task `METAFLOW_SPLIT_PARENT_TASK_ID`. We decided on
# AWS DynamoDb and not AWS Lambdas, because deploying and
# debugging Lambdas would be a nightmare as far as OSS support
# is concerned.
env["METAFLOW_SPLIT_PARENT_TASK_ID"] = (
"$.Parameters.split_parent_task_id_%s" % node.split_parents[-1]
)
# Inherit the run id from the parent and pass it along to children.
attrs["metaflow.run_id.$"] = "$.Parameters.['metaflow.run_id']"
else:
# Set appropriate environment variables for runtime replacement.
if len(node.in_funcs) == 1:
input_paths = (
"sfn-${METAFLOW_RUN_ID}/%s/${METAFLOW_PARENT_TASK_ID}"
% node.in_funcs[0]
)
env["METAFLOW_PARENT_TASK_ID"] = "$.JobId"
# Inherit the run id from the parent and pass it along to children.
attrs["metaflow.run_id.$"] = "$.Parameters.['metaflow.run_id']"
else:
# Generate the input paths in a quasi-compressed format.
# See util.decompress_list for why this is written the way
# it is.
input_paths = "sfn-${METAFLOW_RUN_ID}:" + ",".join(
"/${METAFLOW_PARENT_%s_STEP}/"
"${METAFLOW_PARENT_%s_TASK_ID}" % (idx, idx)
for idx, _ in enumerate(node.in_funcs)
)
# Inherit the run id from the parent and pass it along to children.
attrs["metaflow.run_id.$"] = "$.[0].Parameters.['metaflow.run_id']"
for idx, _ in enumerate(node.in_funcs):
env["METAFLOW_PARENT_%s_TASK_ID" % idx] = "$.[%s].JobId" % idx
env["METAFLOW_PARENT_%s_STEP" % idx] = (
"$.[%s].Parameters.step_name" % idx
)
env["METAFLOW_INPUT_PATHS"] = input_paths
if node.is_inside_foreach:
# Set the task id of the parent job of the foreach split in
# our favorite dumping ground, the AWS Batch attrs. For
# subsequent descendent tasks, this attrs blob becomes the
# input to those descendent tasks. We set and propagate the
# task ids pointing to split_parents through every state.
if any(self.graph[n].type == "foreach" for n in node.in_funcs):
attrs["split_parent_task_id_%s.$" % node.split_parents[-1]] = (
"$.SplitParentTaskId"
)
for parent in node.split_parents[:-1]:
if self.graph[parent].type == "foreach":
attrs["split_parent_task_id_%s.$" % parent] = (
"$.Parameters.split_parent_task_id_%s" % parent
)
elif node.type == "join":
if self.graph[node.split_parents[-1]].type == "foreach":
# A foreach join only gets one set of input from the
# parent tasks. We filter the Map state to only output
# `$.[0]`, since we don't need any of the other outputs,
# that information is available to us from AWS DynamoDB.
# This has a nice side effect of making our foreach
# splits infinitely scalable because otherwise we would
# be bounded by the 32K state limit for the outputs. So,
# instead of referencing `Parameters` fields by index
# (like in `split`), we can just reference them
# directly.
attrs["split_parent_task_id_%s.$" % node.split_parents[-1]] = (
"$.Parameters.split_parent_task_id_%s"
% node.split_parents[-1]
)
for parent in node.split_parents[:-1]:
if self.graph[parent].type == "foreach":
attrs["split_parent_task_id_%s.$" % parent] = (
"$.Parameters.split_parent_task_id_%s" % parent
)
else:
for parent in node.split_parents:
if self.graph[parent].type == "foreach":
attrs["split_parent_task_id_%s.$" % parent] = (
"$.[0].Parameters.split_parent_task_id_%s" % parent
)
else:
for parent in node.split_parents:
if self.graph[parent].type == "foreach":
attrs["split_parent_task_id_%s.$" % parent] = (
"$.Parameters.split_parent_task_id_%s" % parent
)
# Set `METAFLOW_SPLIT_PARENT_TASK_ID_FOR_FOREACH_JOIN` if the
# next transition is to a foreach join, so that the
# stepfunctions decorator can write the mapping for input path
# to DynamoDb.
if any(
self.graph[n].type == "join"
and self.graph[self.graph[n].split_parents[-1]].type == "foreach"
for n in node.out_funcs
):
env["METAFLOW_SPLIT_PARENT_TASK_ID_FOR_FOREACH_JOIN"] = attrs[
"split_parent_task_id_%s.$"
% self.graph[node.out_funcs[0]].split_parents[-1]
]
# Set ttl for the values we set in AWS DynamoDB.
if node.type == "foreach":
if self.workflow_timeout:
env["METAFLOW_SFN_WORKFLOW_TIMEOUT"] = self.workflow_timeout
# Handle split index for for-each.
if any(self.graph[n].type == "foreach" for n in node.in_funcs):
env["METAFLOW_SPLIT_INDEX"] = "$.Index"
env["METAFLOW_CODE_URL"] = self.code_package_url
env["METAFLOW_FLOW_NAME"] = attrs["metaflow.flow_name"]
env["METAFLOW_STEP_NAME"] = attrs["metaflow.step_name"]
env["METAFLOW_RUN_ID"] = attrs["metaflow.run_id.$"]
env["METAFLOW_PRODUCTION_TOKEN"] = self.production_token
env["SFN_STATE_MACHINE"] = self.name
env["METAFLOW_OWNER"] = attrs["metaflow.owner"]
# Can't set `METAFLOW_TASK_ID` due to lack of run-scoped identifiers.
# We will instead rely on `AWS_BATCH_JOB_ID` as the task identifier.
# Can't set `METAFLOW_RETRY_COUNT` either due to integer casting issue.
metadata_env = self.metadata.get_runtime_environment("step-functions")
env.update(metadata_env)
metaflow_version = self.environment.get_environment_info()
metaflow_version["flow_name"] = self.graph.name
metaflow_version["production_token"] = self.production_token
env["METAFLOW_VERSION"] = json.dumps(metaflow_version)
# map config values
cfg_env = {param["name"]: param["kv_name"] for param in self.config_parameters}
if cfg_env:
env["METAFLOW_FLOW_CONFIG_VALUE"] = json.dumps(cfg_env)
# Set AWS DynamoDb Table Name for state tracking for for-eaches.
# There are three instances when metaflow runtime directly interacts
# with AWS DynamoDB.
# 1. To set the cardinality of `foreach`s (which are subsequently)
# read prior to the instantiation of the Map state by AWS Step
# Functions.
# 2. To set the input paths from the parent steps of a foreach join.
# 3. To read the input paths in a foreach join.
if (
node.type == "foreach"
or (
node.is_inside_foreach
and any(
self.graph[n].type == "join"
and self.graph[self.graph[n].split_parents[-1]].type == "foreach"
for n in node.out_funcs
)
)
or (
node.type == "join"
and self.graph[node.split_parents[-1]].type == "foreach"
)
):
if SFN_DYNAMO_DB_TABLE is None:
raise StepFunctionsException(
"An AWS DynamoDB table is needed "
"to support foreach in your flow. "
"You can create one following the "
"instructions listed at *https://a"
"dmin-docs.metaflow.org/metaflow-o"
"n-aws/deployment-guide/manual-dep"
"loyment#scheduling* and "
"re-configure Metaflow using "
"*metaflow configure aws* on your "
"terminal."
)
env["METAFLOW_SFN_DYNAMO_DB_TABLE"] = SFN_DYNAMO_DB_TABLE
# It makes no sense to set env vars to None (shows up as "None" string)
env = {k: v for k, v in env.items() if v is not None}
# Resolve AWS Batch resource requirements.
batch_deco = [deco for deco in node.decorators if deco.name == "batch"][0]
resources = {}
resources.update(batch_deco.attributes)
# Resolve retry strategy.
user_code_retries, total_retries = self._get_retries(node)
task_spec = {
"flow_name": attrs["metaflow.flow_name"],
"step_name": attrs["metaflow.step_name"],
"run_id": "sfn-$METAFLOW_RUN_ID",
# Use AWS Batch job identifier as the globally unique
# task identifier.
"task_id": "$AWS_BATCH_JOB_ID",
# Since retries are handled by AWS Batch, we can rely on
# AWS_BATCH_JOB_ATTEMPT as the job counter.
"retry_count": "$((AWS_BATCH_JOB_ATTEMPT-1))",
}
# merge batch tags supplied through step-fuctions CLI and ones defined in decorator
batch_tags = {**self.aws_batch_tags, **resources["aws_batch_tags"]}
return (
Batch(self.metadata, self.environment, self.flow_datastore)
.create_job(
step_name=node.name,
step_cli=self._step_cli(
node, input_paths, self.code_package_url, user_code_retries
),
task_spec=task_spec,
code_package_metadata=self.code_package_metadata,
code_package_sha=self.code_package_sha,
code_package_url=self.code_package_url,
code_package_ds=self.flow_datastore.TYPE,
image=resources["image"],
queue=resources["queue"],
iam_role=resources["iam_role"],
execution_role=resources["execution_role"],
cpu=resources["cpu"],
gpu=resources["gpu"],
memory=resources["memory"],
run_time_limit=batch_deco.run_time_limit,
shared_memory=resources["shared_memory"],
max_swap=resources["max_swap"],
swappiness=resources["swappiness"],
efa=resources["efa"],
use_tmpfs=resources["use_tmpfs"],
aws_batch_tags=batch_tags,
tmpfs_tempdir=resources["tmpfs_tempdir"],
tmpfs_size=resources["tmpfs_size"],
tmpfs_path=resources["tmpfs_path"],
inferentia=resources["inferentia"],
env=env,
attrs=attrs,
host_volumes=resources["host_volumes"],
efs_volumes=resources["efs_volumes"],
ephemeral_storage=resources["ephemeral_storage"],
log_driver=resources["log_driver"],
log_options=resources["log_options"],
offload_command_to_s3=self.compress_state_machine,
privileged=resources["privileged"],
)
.attempts(total_retries + 1)
)
def _get_retries(self, node):
max_user_code_retries = 0
max_error_retries = 0
# Different decorators may have different retrying strategies, so take
# the max of them.
for deco in node.decorators:
user_code_retries, error_retries = deco.step_task_retry_count()
max_user_code_retries = max(max_user_code_retries, user_code_retries)
max_error_retries = max(max_error_retries, error_retries)
return max_user_code_retries, max_user_code_retries + max_error_retries
def _step_cli(self, node, paths, code_package_url, user_code_retries):
cmds = []
script_name = os.path.basename(sys.argv[0])
executable = self.environment.executable(node.name)
if R.use_r():
entrypoint = [R.entrypoint()]
else:
entrypoint = [executable, script_name]
# Use AWS Batch job identifier as the globally unique task identifier.
task_id = "${AWS_BATCH_JOB_ID}"
top_opts_dict = {
"with": [
decorator.make_decorator_spec()
for decorator in node.decorators
if not decorator.statically_defined and decorator.inserted_by is None
]
}
# FlowDecorators can define their own top-level options. They are
# responsible for adding their own top-level options and values through
# the get_top_level_options() hook. See similar logic in runtime.py.
for deco in flow_decorators(self.flow):
top_opts_dict.update(deco.get_top_level_options())
top_opts = list(dict_to_cli_options(top_opts_dict))
top_level = top_opts + [
"--quiet",
"--metadata=%s" % self.metadata.TYPE,
"--environment=%s" % self.environment.TYPE,
"--datastore=%s" % self.flow_datastore.TYPE,
"--datastore-root=%s" % self.flow_datastore.datastore_root,
"--event-logger=%s" % self.event_logger.TYPE,
"--monitor=%s" % self.monitor.TYPE,
"--no-pylint",
"--with=step_functions_internal",
]
if node.name == "start":
# We need a separate unique ID for the special _parameters task
task_id_params = "%s-params" % task_id
# Export user-defined parameters into runtime environment
param_file = "".join(
random.choice(string.ascii_lowercase) for _ in range(10)
)
export_params = (
"python -m "
"metaflow.plugins.aws.step_functions.set_batch_environment "
"parameters %s && . `pwd`/%s" % (param_file, param_file)
)
params = (
entrypoint
+ top_level
+ [
"init",
"--run-id sfn-$METAFLOW_RUN_ID",
"--task-id %s" % task_id_params,
]
)
# Assign tags to run objects.
if self.tags:
params.extend("--tag %s" % tag for tag in self.tags)
# If the start step gets retried, we must be careful not to
# regenerate multiple parameters tasks. Hence, we check first if
# _parameters exists already.
exists = entrypoint + [
"dump",
"--max-value-size=0",
"sfn-${METAFLOW_RUN_ID}/_parameters/%s" % (task_id_params),
]
cmd = "if ! %s >/dev/null 2>/dev/null; then %s && %s; fi" % (
" ".join(exists),
export_params,
" ".join(params),
)
cmds.append(cmd)
paths = "sfn-${METAFLOW_RUN_ID}/_parameters/%s" % (task_id_params)
if node.type == "join" and self.graph[node.split_parents[-1]].type == "foreach":
parent_tasks_file = "".join(
random.choice(string.ascii_lowercase) for _ in range(10)
)
export_parent_tasks = (
"python -m "
"metaflow.plugins.aws.step_functions.set_batch_environment "
"parent_tasks %s && . `pwd`/%s" % (parent_tasks_file, parent_tasks_file)
)
cmds.append(export_parent_tasks)
step = [
"step",
node.name,
"--run-id sfn-$METAFLOW_RUN_ID",
"--task-id %s" % task_id,
# Since retries are handled by AWS Batch, we can rely on
# AWS_BATCH_JOB_ATTEMPT as the job counter.
"--retry-count $((AWS_BATCH_JOB_ATTEMPT-1))",
"--max-user-code-retries %d" % user_code_retries,
"--input-paths %s" % paths,
]
if any(self.graph[n].type == "foreach" for n in node.in_funcs):
# We set the `METAFLOW_SPLIT_INDEX` through JSONPath-foo
# to pass the state from the parent DynamoDb state for for-each.
step.append("--split-index $METAFLOW_SPLIT_INDEX")
if self.tags:
step.extend("--tag %s" % tag for tag in self.tags)
if self.namespace is not None:
step.append("--namespace=%s" % self.namespace)
cmds.append(" ".join(entrypoint + top_level + step))
return " && ".join(cmds)
class Workflow(object):
def __init__(self, name):
self.name = name
tree = lambda: defaultdict(tree)
self.payload = tree()
def mode(self, mode):
self.payload["ProcessorConfig"] = {"Mode": mode}
if mode == "DISTRIBUTED":
self.payload["ProcessorConfig"]["ExecutionType"] = "STANDARD"
return self
def start_at(self, start_at):
self.payload["StartAt"] = start_at
return self
def add_state(self, state):
self.payload["States"][state.name] = state.payload
return self
def timeout_seconds(self, timeout_seconds):
self.payload["TimeoutSeconds"] = timeout_seconds
return self
def to_json(self, pretty=False):
return json.dumps(self.payload, indent=4 if pretty else None)
class State(object):
def __init__(self, name):
self.name = name
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["Type"] = "Task"
def resource(self, resource):
self.payload["Resource"] = resource
return self
def next(self, state):
self.payload["Next"] = state
return self
def end(self):
self.payload["End"] = True
return self
def parameter(self, name, value):
self.payload["Parameters"][name] = value
return self
def output_path(self, output_path):
self.payload["OutputPath"] = output_path
return self
def result_path(self, result_path):
self.payload["ResultPath"] = result_path
return self
def result_selector(self, name, value):
self.payload["ResultSelector"][name] = value
return self
def _partition(self):
# This is needed to support AWS Gov Cloud and AWS CN regions
return SFN_IAM_ROLE.split(":")[1]
def retry_strategy(self, retry_strategy):
self.payload["Retry"] = [retry_strategy]
return self
def batch(self, job):
self.resource(
"arn:%s:states:::batch:submitJob.sync" % self._partition()
).parameter("JobDefinition", job.payload["jobDefinition"]).parameter(
"JobName", job.payload["jobName"]
).parameter(
"JobQueue", job.payload["jobQueue"]
).parameter(
"Parameters", job.payload["parameters"]
).parameter(
"ContainerOverrides", to_pascalcase(job.payload["containerOverrides"])
).parameter(
"RetryStrategy", to_pascalcase(job.payload["retryStrategy"])
).parameter(
"Timeout", to_pascalcase(job.payload["timeout"])
)
# tags may not be present in all scenarios
if "tags" in job.payload:
self.parameter("Tags", job.payload["tags"])
# set retry strategy for AWS Batch job submission to account for the
# measily 50 jobs / second queue admission limit which people can
# run into very quickly.
self.retry_strategy(
{
"ErrorEquals": ["Batch.AWSBatchException"],
"BackoffRate": 2,
"IntervalSeconds": 2,
"MaxDelaySeconds": 60,
"MaxAttempts": 10,
"JitterStrategy": "FULL",
}
)
return self
def dynamo_db(self, table_name, primary_key, values):
self.resource("arn:%s:states:::dynamodb:getItem" % self._partition()).parameter(
"TableName", table_name
).parameter("Key", {"pathspec": {"S.$": primary_key}}).parameter(
"ConsistentRead", True
).parameter(
"ProjectionExpression", values
)
return self
class Pass(object):
def __init__(self, name):
self.name = name
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["Type"] = "Pass"
def end(self):
self.payload["End"] = True
return self
def parameter(self, name, value):
self.payload["Parameters"][name] = value
return self
def output_path(self, output_path):
self.payload["OutputPath"] = output_path
return self
class Parallel(object):
def __init__(self, name):
self.name = name
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["Type"] = "Parallel"
def branch(self, workflow):
if "Branches" not in self.payload:
self.payload["Branches"] = []
self.payload["Branches"].append(workflow.payload)
return self
def next(self, state):
self.payload["Next"] = state
return self
def output_path(self, output_path):
self.payload["OutputPath"] = output_path
return self
def result_path(self, result_path):
self.payload["ResultPath"] = result_path
return self
class Map(object):
def __init__(self, name):
self.name = name
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["Type"] = "Map"
self.payload["MaxConcurrency"] = 0
def iterator(self, workflow):
self.payload["Iterator"] = workflow.payload
return self
def next(self, state):
self.payload["Next"] = state
return self
def items_path(self, items_path):
self.payload["ItemsPath"] = items_path
return self
def parameter(self, name, value):
self.payload["Parameters"][name] = value
return self
def max_concurrency(self, max_concurrency):
self.payload["MaxConcurrency"] = max_concurrency
return self
def output_path(self, output_path):
self.payload["OutputPath"] = output_path
return self
def result_path(self, result_path):
self.payload["ResultPath"] = result_path
return self
def item_reader(self, item_reader):
self.payload["ItemReader"] = item_reader.payload
return self
def result_writer(self, bucket, prefix):
if bucket is not None and prefix is not None:
self.payload["ResultWriter"] = {
"Resource": "arn:aws:states:::s3:putObject",
"Parameters": {
"Bucket": bucket,
"Prefix": prefix,
},
}
return self
class JSONItemReader(object):
def __init__(self):
tree = lambda: defaultdict(tree)
self.payload = tree()
self.payload["ReaderConfig"] = {"InputType": "JSON", "MaxItems": 1}
def resource(self, resource):
self.payload["Resource"] = resource
return self
def parameter(self, name, value):
self.payload["Parameters"][name] = value
return self
def output_path(self, output_path):
self.payload["OutputPath"] = output_path
return self
================================================
FILE: metaflow/plugins/aws/step_functions/step_functions_cli.py
================================================
import base64
import json
import re
from hashlib import sha1
from metaflow import JSONType, current, decorators, parameters
from metaflow._vendor import click
from metaflow.exception import MetaflowException, MetaflowInternalError
from metaflow.metaflow_config import (
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
SERVICE_VERSION_CHECK,
SFN_STATE_MACHINE_PREFIX,
SFN_COMPRESS_STATE_MACHINE,
UI_URL,
)
from metaflow.package import MetaflowPackage
from metaflow.plugins.aws.batch.batch_decorator import BatchDecorator
from metaflow.tagging_util import validate_tags
from metaflow.util import get_username, to_bytes, to_unicode, version_parse
from .production_token import load_token, new_token, store_token
from .step_functions import StepFunctions
from metaflow.tagging_util import validate_tags
from ..aws_utils import validate_aws_tag
VALID_NAME = re.compile(r"[^a-zA-Z0-9_\-\.]")
class IncorrectProductionToken(MetaflowException):
headline = "Incorrect production token"
class RunIdMismatch(MetaflowException):
headline = "Run ID mismatch"
class IncorrectMetadataServiceVersion(MetaflowException):
headline = "Incorrect version for metaflow service"
class StepFunctionsStateMachineNameTooLong(MetaflowException):
headline = "AWS Step Functions state machine name too long"
@click.group()
def cli():
pass
@cli.group(help="Commands related to AWS Step Functions.")
@click.option(
"--name",
default=None,
type=str,
help="State Machine name. The flow name is used instead "
"if this option is not specified",
)
@click.pass_obj
def step_functions(obj, name=None):
obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
(
obj.state_machine_name,
obj.token_prefix,
obj.is_project,
) = resolve_state_machine_name(obj, name)
@step_functions.command(
help="Deploy a new version of this workflow to " "AWS Step Functions."
)
@click.option(
"--authorize",
default=None,
help="Authorize using this production token. You need this "
"when you are re-deploying an existing flow for the first "
"time. The token is cached in METAFLOW_HOME, so you only "
"need to specify this once.",
)
@click.option(
"--generate-new-token",
is_flag=True,
help="Generate a new production token for this flow. "
"This will move the production flow to a new "
"namespace.",
)
@click.option(
"--new-token",
"given_token",
default=None,
help="Use the given production token for this flow. "
"This will move the production flow to the given "
"namespace.",
)
@click.option(
"--tag",
"tags",
multiple=True,
default=None,
help="Annotate all objects produced by AWS Step Functions runs "
"with the given tag. You can specify this option multiple "
"times to attach multiple tags.",
)
@click.option(
"--aws-batch-tag",
"aws_batch_tags",
multiple=True,
default=None,
help="AWS Batch tags.",
)
@click.option(
"--namespace",
"user_namespace",
default=None,
help="Change the namespace from the default (production token) "
"to the given tag. See run --help for more information.",
)
@click.option(
"--only-json",
is_flag=True,
default=False,
help="Only print out JSON sent to AWS Step Functions. Do not " "deploy anything.",
)
@click.option(
"--max-workers",
default=100,
show_default=True,
help="Maximum number of parallel processes.",
)
@click.option(
"--workflow-timeout", default=None, type=int, help="Workflow timeout in seconds."
)
@click.option(
"--log-execution-history",
is_flag=True,
help="Log AWS Step Functions execution history to AWS CloudWatch "
"Logs log group.",
)
@click.option(
"--use-distributed-map/--no-use-distributed-map",
is_flag=True,
help="Use AWS Step Functions Distributed Map instead of Inline Map for "
"defining foreach tasks in Amazon State Language.",
)
@click.option(
"--compress-state-machine/--no-compress-state-machine",
is_flag=True,
default=SFN_COMPRESS_STATE_MACHINE,
help="Compress AWS Step Functions state machine to fit within the 8K limit.",
)
@click.option(
"--deployer-attribute-file",
default=None,
show_default=True,
type=str,
help="Write the workflow name to the file specified. Used internally for Metaflow's Deployer API.",
hidden=True,
)
@click.pass_obj
def create(
obj,
tags=None,
aws_batch_tags=None,
user_namespace=None,
only_json=False,
authorize=None,
generate_new_token=False,
given_token=None,
max_workers=None,
workflow_timeout=None,
log_execution_history=False,
use_distributed_map=False,
compress_state_machine=False,
deployer_attribute_file=None,
):
for node in obj.graph:
if any([d.name == "slurm" for d in node.decorators]):
raise MetaflowException(
"Step *%s* is marked for execution on Slurm with AWS Step Functions which isn't currently supported."
% node.name
)
validate_tags(tags)
if deployer_attribute_file:
with open(deployer_attribute_file, "w") as f:
json.dump(
{
"name": obj.state_machine_name,
"flow_name": obj.flow.name,
"metadata": obj.metadata.metadata_str(),
},
f,
)
obj.echo(
"Deploying *%s* to AWS Step Functions..." % obj.state_machine_name, bold=True
)
if SERVICE_VERSION_CHECK:
check_metadata_service_version(obj)
token = resolve_token(
obj.state_machine_name,
obj.token_prefix,
obj,
authorize,
given_token,
generate_new_token,
obj.is_project,
)
flow = make_flow(
obj,
token,
obj.state_machine_name,
tags,
aws_batch_tags,
user_namespace,
max_workers,
workflow_timeout,
obj.is_project,
use_distributed_map,
compress_state_machine,
)
if only_json:
obj.echo_always(flow.to_json(), err=False, no_bold=True)
else:
flow.deploy(log_execution_history)
obj.echo(
"State Machine *{state_machine}* "
"for flow *{name}* pushed to "
"AWS Step Functions successfully.\n".format(
state_machine=obj.state_machine_name, name=current.flow_name
),
bold=True,
)
if obj._is_state_machine_name_hashed:
obj.echo(
"Note that the flow was deployed with a truncated name "
"due to a length limit on AWS Step Functions. The "
"original long name is stored in task metadata.\n"
)
flow.schedule()
obj.echo("What will trigger execution of the workflow:", bold=True)
obj.echo(flow.trigger_explanation(), indent=True)
def check_metadata_service_version(obj):
metadata = obj.metadata
version = metadata.version()
if version == "local":
return
elif version is not None and version_parse(version) >= version_parse("2.0.2"):
# Metaflow metadata service needs to be at least at version 2.0.2
return
else:
obj.echo("")
obj.echo(
"You are running a version of the metaflow service "
"that currently doesn't support AWS Step Functions. "
)
obj.echo(
"For more information on how to upgrade your "
"service to a compatible version (>= 2.0.2), visit:"
)
obj.echo(
" https://docs.outerbounds.com/engineering/operations/migration/",
fg="green",
)
obj.echo(
"Once you have upgraded your metadata service, please "
"re-execute your command."
)
raise IncorrectMetadataServiceVersion(
"Try again with a more recent " "version of metaflow service " "(>=2.0.2)."
)
def resolve_state_machine_name(obj, name):
def attach_prefix(name: str):
if SFN_STATE_MACHINE_PREFIX is not None and (
not name.startswith(SFN_STATE_MACHINE_PREFIX)
):
return SFN_STATE_MACHINE_PREFIX + "_" + name
return name
project = current.get("project_name")
obj._is_state_machine_name_hashed = False
if project:
if name:
raise MetaflowException(
"--name is not supported for @projects. " "Use --branch instead."
)
state_machine_name = attach_prefix(current.project_flow_name)
project_branch = to_bytes(".".join((project, current.branch_name)))
token_prefix = (
"mfprj-%s"
% to_unicode(base64.b32encode(sha1(project_branch).digest()))[:16]
)
is_project = True
# AWS Step Functions has a limit of 80 chars for state machine names.
# We truncate the state machine name if the computed name is greater
# than 60 chars and append a hashed suffix to ensure uniqueness.
if len(state_machine_name) > 60:
name_hash = to_unicode(
base64.b32encode(sha1(to_bytes(state_machine_name)).digest())
)[:16].lower()
state_machine_name = "%s-%s" % (state_machine_name[:60], name_hash)
obj._is_state_machine_name_hashed = True
else:
if name and VALID_NAME.search(name):
raise MetaflowException("Name '%s' contains invalid characters." % name)
state_machine_name = attach_prefix(name if name else current.flow_name)
token_prefix = state_machine_name
is_project = False
if len(state_machine_name) > 80:
msg = (
"The full name of the workflow:\n*%s*\nis longer than 80 "
"characters.\n\n"
"To deploy this workflow to AWS Step Functions, please "
"assign a shorter name\nusing the option\n"
"*step-functions --name