Repository: geohot/lolrecaptcha Branch: master Commit: 563a8ae8ff23 Files: 5 Total size: 9.0 KB Directory structure: gitextract_at3qed7a/ ├── .gitignore ├── LICENSE ├── README ├── fetch.go └── loader.go ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .*.swp imgs dst.png ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2016 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README ================================================ Time to learn the go language. We crack the recaptcha because it's Christmas. We want the Merry Christmas for all! Sorry I am golang noob and code is low quality and shit Will eventually use handcoded low quality convnet maybe Anyone have clever ideas to get ground truth? Maybe we unsupervised cluster and then write a paper Because to date that will be the #1 use of unsupervised learning I go on some dates in NYC tomorrow so maybe we finish this today? --> Probs not dawg it takes 2 hours in golang what takes you 5 minutes in Python --> Maybe breaking recaptcha will be a fun date activity? Will ask. Very cheap! Goal is breaking the demo @ https://www.google.com/recaptcha/api2/demo Also I realize clicking street signs is probably training the Google Self Driving Car Cheaters. == Project Updates == Christmas afternoon: * Lots of people in my house. It is loud. The fetcher is working. Christmas night: * Been in battle to get more data. Google loves data and hates sharing. Got blocked. * Made threads to get fast data. I now have 50842 alleged street sign pictures. * They are alleged only though. We need to train a binary classifier. Morning after christmas: * Got bagels. I love bagels == Project Bullshit == * WE ARE ON HACKER NEWS https://news.ycombinator.com/item?id=13256266 * OMG THIS IS LITERALLY A BIGGER JOKE THAN SHIA LABEOUF * (jkjkjk Shia LaBeouf is way more famous than I will ever be) DyingLlama is my hero, I found his youtube last night and got inspired https://www.youtube.com/channel/UC88oKpyXNid09t1m_PZlvfQ ================================================ FILE: fetch.go ================================================ package main import ( "bytes" "crypto/md5" "encoding/hex" "fmt" "github.com/disintegration/imaging" "golang.org/x/net/html" "image" "image/jpeg" "image/png" "io" "io/ioutil" "log" "math/rand" "net/http" "net/url" "os" "strings" "time" ) const apiKey string = "6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-" func fetchImg(ck string) image.Image { // fetch the image u, err := url.Parse("http://google.com/recaptcha/api2/payload") if err != nil { log.Fatal(err) } q := u.Query() q.Set("c", ck) q.Set("k", apiKey) u.RawQuery = q.Encode() // do fetch imgresponse, err := http.Get(u.String()) if err != nil { log.Fatal(err) } img, err := jpeg.Decode(imgresponse.Body) if err != nil { log.Fatal(err) } return img } func getChallengeKey() (string, string, image.Image) { // build the request u, err := url.Parse("http://google.com/recaptcha/api/fallback") if err != nil { log.Fatal(err) } q := u.Query() q.Set("k", apiKey) u.RawQuery = q.Encode() //fmt.Println(u) // fetch the webpage response, err := http.Get(u.String()) if err != nil { log.Fatal(err) } defer response.Body.Close() // print it bodyBytes, _ := ioutil.ReadAll(response.Body) z := html.NewTokenizer(ioutil.NopCloser(bytes.NewBuffer(bodyBytes))) tmparr := []string{} ck := "" for { tt := z.Next() switch tt { case html.ErrorToken: return ck, tmparr[3], fetchImg(ck) case html.StartTagToken, html.SelfClosingTagToken: tn, attr := z.TagName() if string(tn) == "img" && attr { for { k, v, attr := z.TagAttr() if string(k) == "src" { //fmt.Println(string(v)) u, err := url.Parse(string(v)) if err != nil { log.Fatal(err) } q := u.Query() //fmt.Println(q) if q["k"][0] != apiKey { log.Fatal("apiKey doesn't match") } ck = q["c"][0] } if !attr { break } } } case html.TextToken: //fmt.Println(z.Token()) tmparr = append(tmparr, z.Token().String()) } } } func downloader() { bigcnt := 0 for { // parse it ck, typ, img := getChallengeKey() h := md5.New() io.WriteString(h, ck) hh := hex.EncodeToString(h.Sum(nil)) typ = strings.Replace(typ, " ", "_", -1) //fmt.Println(ck, typ, img.Bounds()) fmt.Println(bigcnt, hh, typ, img.Bounds()) if img.Bounds() != image.Rect(0, 0, 300, 300) { log.Fatal("IMAGE IS THE WRONG SIZE") } // write it os.MkdirAll("imgs/"+typ, 0755) cnt := 0 for h := 0; h < 300; h += 100 { for w := 0; w < 300; w += 100 { lilimg := imaging.Crop(img, image.Rect(w, h, w+100, h+100)) fn := fmt.Sprintf("imgs/%s/%s_%d.png", typ, hh, cnt) f, err := os.OpenFile(fn, os.O_CREATE|os.O_WRONLY, 0644) if err != nil { log.Fatal(err) } png.Encode(f, lilimg) f.Close() cnt += 1 } } bigcnt += 1 time.Sleep(time.Duration(rand.Intn(2000)) * time.Millisecond) } } func main() { fmt.Println("my first golang program") /*for i := 0; i < 8; i += 1 { go downloader() }*/ downloader() // move on fmt.Println("still alive!") } ================================================ FILE: loader.go ================================================ package main /* so like wow there's no neural networks for go, CNN anyone? idea is this and if you cheat and use python you are a big cheater give all alleged street sign images 0.4 of street sign and give all other images 0.01 chance of street sign and maybe with the magic of neural networks we will learn? TODO: don't be cheater and use python only golang pull request accepted */ import ( "fmt" "github.com/disintegration/gift" _ "github.com/disintegration/imaging" "image" "image/png" "log" "math/rand" "os" "path/filepath" "strings" //"github.com/NOX73/go-neural" //"github.com/NOX73/go-neural/learn" //"github.com/sajari/random-forest/RF" ) func randomArray(n int) []float32 { ret := make([]float32, n) for i := 0; i < n; i++ { ret[i] = (rand.Float32() - 0.5) * 5 } return ret } func main() { log.Print("use log so we don't have to put an underscore before the import") type Example struct { features []float32 category string } paths := make(chan string) processed := make(chan Example) // the Seed for the network is 7 rand.Seed(7) g := gift.New( // edge detector gift.Convolution( []float32{ -1, -1, -1, -1, 8, -1, -1, -1, -1, }, false, false, false, 0.0), // is this max pool? gift.Maximum(2, true), gift.Resize(50, 0, gift.LinearResampling), // random 5x5 conv, hmm but like the color channels bro this is a shit neural network gift.Convolution( randomArray(25), false, false, false, 0.0), // is this max pool? gift.Maximum(2, true), gift.Resize(25, 0, gift.LinearResampling), // random 3x3 conv, hmm but like the color channels bro this is a shit neural network gift.Convolution( randomArray(9), false, false, false, 0.0), // is this max pool? gift.Maximum(2, true), gift.Resize(10, 0, gift.LinearResampling), // 300 features one for each spartan RIP ) //n := neural.NewNetwork(300, []int{100,20,1}) //n.RandomizeSynapses() // forest builder go func() { // is this a proper design pattern? // probs not it's awkward ROS node shit for { sample := <-processed fmt.Println(sample) // ugh no inline if? /*prob := []float64{0.01} if sample.yes { prob = []float64{0.4} } learn.Learn(n, sample.features, prob, 0.05) println(prob[0], learn.Evaluation(n, sample.features, prob))*/ } }() // image loader and network runner go func() { for { path := <-paths // load the image, this is 5 lines // i hate all this error handling does go have exceptions? f, err := os.Open(path) if err != nil { log.Fatal(err, path) } img, err := png.Decode(f) if err != nil { log.Fatal(err, path) } f.Close() dst := image.NewRGBA(g.Bounds(img.Bounds())) g.Draw(dst, img) // extract features // i can write much better than this wow shit ret := make([]float32, 300) cnt := 0 for i := 0; i < 400; i++ { if i%4 == 3 { continue } ret[cnt] = float32(dst.Pix[i]) / 256.0 cnt += 1 } processed <- Example{features: ret, category: strings.Split(path, "/")[1]} //imaging.Save(dst, "dst.png") //println(dst) } }() files := []string{} filepath.Walk("imgs/", func(path string, finfo os.FileInfo, err error) error { if finfo.IsDir() { return nil } //paths <- path files = append(files, path) return nil }) fmt.Println("files list built") println(len(files)) /*perm := rand.Perm(len(files)) for _, v := range perm { paths <- files[v] }*/ }