Category Archives: Projects

tictactoe – very smart computer

Tictactoe – Very Smart Computer

I finished what I believe is the last iteration of my tictactoe project. I might do some stuff to update the visual element, but I think the AI is finally complete enough that it can’t be beaten.

I borrowed a lot from my friend’s solution but I made sure to rewrite this from scratch and understand mechanically how everything was working.

I tried a different way of storing the data and added a function that determines whether a move would result in a two-way win opportunity for the player. I considered moving on from this project at one point, but I couldn’t help but feel that if I couldn’t figure this out, how would I be able to do things that were more difficult?

I’m happy with the result and looking forward to moving on to the next project!

(function() {
	var board, turn;
	var counter = ['0', '1', '2']
	var boardElem = document.getElementById('board');
	
	var positions = counter.reduce(function(acc, r) {
	  return acc.concat(counter.map(function(c) { return r + c; }));
	}, []);

	var lines = counter.map(function(r) {
		return counter.map(function(c) {
			return r + c;
		})
	}).concat(['0', '1', '2'].map(function(r) {
		return ['0', '1', '2'].map(function(c) {
			return c + r;
		})
	})).concat([['00', '11', '22'],['20', '11', '02']])

	function emptyCell(coord) {
		board[coord] = '';
		return coord;
	}

	function newBoard(positions) {
		board = new Object();
		positions = positions.map(emptyCell);
	}

	function renderHTML(coord) {
		return '<div id = "' + coord + '">' + board[coord] + '</div>' 
	};

	function renderBoard(b) {
		var boardString = positions.reduce(function(acc, coord) {return acc + renderHTML(coord)},'');
		boardElem.innerHTML = boardString;
	};

	function reset() {
		turn = 'X';
		newBoard(positions);		
		renderBoard(board);
	}

	function makeMove(move){
		board[move] = turn;
		renderBoard(board);
		turn = turn === 'X' ? 'O' : 'X';

		if (gameOver() === true) {
			alert(turn === 'X' ? 'YOU LOSE' : 'YOU WIN');
			reset();
		}
		else if (turn === 'O') {
			computerMove();
		}
	}

	function check(value) {
		return function(c) {
			return board === value
		}
	}

	function checkLines(line) {
		return line.every(check('X')) || line.every(check('O'));
	}

	function gameOver() {
		return lines.some(checkLines);
	}

	function computerMove() {
		//move = win() || block() || firstMove() || setup() || anyMove();
		move = win() || block() || firstMove() || oneAhead() || setup() || anyMove();
		if (move) {
			makeMove(move);	
		}
		else {
			alert('Draw');
			reset();
		}
		testp.innerHTML = oneAhead();
	}

	function anyMove() {
		return positions.find(function(pos) {return board[pos] === ''})
	}

	function firstMove() {
		if (board['11'] === '') {
			return '11';
		}
	}

	function checkWinOpp(line) {
		var lineConfig = checkScenario(line, 'O');
		if (lineConfig[0] === 2 && lineConfig[1] === 1) {
			return true;
		}
	}	

	function checkBlock(line) {
		var lineConfig = checkScenario(line, 'X');
		if (lineConfig[0] === 2 && lineConfig[1] === 1) {
			return true;
		}
		else {
			return false;
		}
	}

	function checkOneAhead(tempBoard) {
		return function(line) {
			var myMoves = line.reduce(function(acc, c){return tempBoard === 'X' ? acc + 1 : acc}, 0);
			var blankMoves = line.reduce(function(acc, c){return tempBoard === '' ? acc + 1 : acc}, 0);	

			if (myMoves === 2 && blankMoves === 1) {
				return true;
			}
			else {
				return false;
			}
		};
	}

	function checkClear(line) {
		var lineConfig = checkScenario(line, 'O');
		if (lineConfig[0] === 1 && lineConfig[1] === 2) {
			return true;
		}
	}

	function checkScenario(line, value) {
		var myMoves = line.reduce(function(acc, c){return board === value ? acc + 1 : acc}, 0);
		var blankMoves = line.reduce(function(acc, c){return board === '' ? acc + 1 : acc}, 0);
		return [myMoves, blankMoves];
	}

	function win(){
		var line = lines.find(checkWinOpp);
		if (line !== undefined) {
			var linesData = line.map(function(c) {return board});
			return line[linesData.indexOf("")]
		}
		else {
			return false
		}
	}

	function block() {
		var line = lines.find(checkBlock);
		if (line !== undefined) {
			var linesData = line.map(function(c) {return board});
			return line[linesData.indexOf("")]
		}
		else {
			return false
		}		
	}

	function setup() {
		var line = lines.find(checkClear);
		if (line !== undefined) {
			var linesData = line.map(function(c) {return board});
			return line[linesData.indexOf("")]
		}
		else {
			return false
		}		
	}

	Array.prototype.getDuplicates = function () {
    var duplicates = {};
    for (var i = 0; i < this.length; i++) {
      if(duplicates.hasOwnProperty(this[i])) {
          duplicates[this[i]].push(i);
      } else if (this.lastIndexOf(this[i]) !== i) {
          duplicates[this[i]] = [i];
      }
    }
    return duplicates;
	};

	function oneAhead() {
		var line = lines.find(checkClear);
		if (line !== undefined) {
			var linesData = line.map(function(c) {return board});
			var blankCells = linesData.getDuplicates();
			var xMove = line[blankCells[''][1]]			
			if (thinkAhead(xMove)) {
				return thinkAhead(xMove);
			}
			else {
				return false
			};
		}
		else {
			return false
		}
	};

	function cloneObject(obj) {
    if (obj === null || typeof obj !== 'object') {
        return obj;
    }
    var temp = obj.constructor(); 
    for (var key in obj) {
        temp[key] = cloneObject(obj[key]);
    }
    return temp;
	}	

	function thinkAhead(xMove) {
		debugger;
		var tempBoard = cloneObject(board);
		tempBoard[xMove] = 'X';
		var winLines = lines.map(checkOneAhead(tempBoard));
		var numWinLines = winLines.reduce(function(acc, value){return value ? acc + 1 : acc}, 0);
		if (numWinLines > 1) {
			return xMove;
		}
		else {
			return false;
		}
	}

	function win(){
		var line = lines.find(checkWinOpp);
		if (line !== undefined) {
			var linesData = line.map(function(c) {return board});
			return line[linesData.indexOf("")]
		}
		else {
			return false
		}
	}

	function handleClick(e) {
		var move = e.target.id;
		if (board[move] === '') {
			makeMove(move);	
		}	
	}

	function listen() {
		boardElem.addEventListener('click', handleClick);
	}
	
	listen();
	reset();
})();
;

Tictactoe – 2 player

I’m currently working through the second Front End section via Bento.io which consists of JavaScript. I find that if I don’t apply what I learn, I quickly forget so I decided to tackle a project that would make me apply what I’ve learned in HTML, CSS, and JavaScript.

My first iteration was a simple 2 player tictactoe game. It’s pretty simple and doesn’t do much aside from placing X’s or O’s.

The HTML consists of nine div elements each containing a paragraph element. I included an onclick function for the placement of moves. The middle row and middle column contain a class for the grid lines.

<html>

<head>
  <link rel="stylesheet" href="stylesheet.css" />
</head>

<body>
  <div>
    <p onclick="myFunction(this, 'black')">1</p>
  </div>
  <div class = "mid-column" >
    <p onclick="myFunction(this, 'black')">2</p>
  </div>
  <div>
    <p onclick="myFunction(this, 'black')">3</p>
  </div>
  <div class = "mid-row">
    <p onclick="myFunction(this, 'black')">4</p>
  </div>
  <div class = "mid-row mid-column">
    <p onclick="myFunction(this, 'black')">5</p>
  </div>
  <div class = "mid-row">
    <p onclick="myFunction(this, 'black')">6</p>
  </div>
  <div>
    <p onclick="myFunction(this, 'black')">7</p>
  </div>
  <div class = "mid-column">
    <p onclick="myFunction(this, 'black')">8</p>
  </div>
  <div>
    <p onclick="myFunction(this, 'black')">9</p>
  </div>

  <script src="javascript.js" type="text/javascript"></script>
   
  </body>
</html>

I’ve formatted the divs to make the 3×3 grid shape. The body element is formatted to keep the grid at a 600px width and centers it as well. Also, I created some classes to make the lines.

p {
	text-align: center;
	font-size: 150px;
	margin-top: 5%;
  font-family: Helvetica;
  font-weight: Bold;
  Color: rgba(200, 200, 200, 0);
}

div {
	height: 180px;
	width: 180px;
	margin: 0px -2px;
	display: inline-block;
	position: relative;
}

.mid-row {
  border-bottom: 20px solid black;
  border-top:  20px solid black;
}

.mid-column {
  border-right: 20px solid black;
  border-left:  20px solid black;
}

body {
	background-color: White;
	max-width: 600px;
	min-width: 600px;
	text-align: center;
	margin: 20px auto;
}

The Javascript code is pretty simple. It just checks whether the spot is free and toggles the text for the paragraph element into X’s or O’s.

 var move = "O";
    
    function myFunction(elmnt,clr) {
    
	if (elmnt.innerHTML != "X" && elmnt.innerHTML != "O") {
	
		if (move === "O") {	
		  move = "X";
		}
		else {
		  move = "O";
		}
		
		elmnt.innerHTML = move;
		elmnt.style.color = clr;
	}
}

dataviz part 6 – Increasing scope of data

I’ve updated the code to grab more QB information so I don’t have to manually input the QB names.

# python3
# footballStatScraper_v7.py - pull stats into a list

import json, bs4, requests, csv
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException

browser = webdriver.Firefox()
baseUrl = 'http://www.pro-football-reference.com/'
masterData = []

def getQBList():
    qbListUrl = 'http://www.pro-football-reference.com/players/qbindex.htm'
    soup = bs4.BeautifulSoup(requests.get(qbListUrl).text, "lxml")
    elems = soup.select('b > a')
    return dict(zip([playerName.text for playerName in elems], [playerUrl.get('href') for playerUrl in elems]))
    
def goToQBGameLogs(qb, qbUrl):
    browser.get(baseUrl + qbUrl)    
    gameLogsElem = browser.find_element_by_link_text('Gamelogs [+]')
    gameLogsElem.click()
    browser.implicitly_wait(10)
    
def getCsv(seasonType, qb):
    browser.find_element_by_xpath("//div[@tabindex='2']/span[contains(text(),'CSV')]").click()
    statList = FormatcsvText(browser.find_element_by_id(seasonType).text.split('\n'))
    return statList

def goToPlayoffPage():
    browser.get(browser.current_url + '/post')
    
def FormatcsvText(csvTextList):
    return [row.split(',') for row in csvTextList]

def addToDataList(statList, qbName, seasonType):
    fieldNames = defineFieldNames(statList)
    for row in range(0,len(statList)):
        dataDict = {'Player': qbName, 'seasonType': seasonType}
        if statList[row][0].isnumeric() and row > 1:
            dataDict.update(dict(zip(fieldNames,statList[row])))
            masterData.append(dataDict)

def defineFieldNames(statList):
    fieldNames = ['%s%s' % t for t in zip(statList[0], statList[1])]
    return ['Where' if field == '' else field for field in fieldNames]

def saveAsJson():
    with open('data.json', 'w') as outfile:
        json.dump(masterData, outfile)

qbDict = getQBList()

for qb, url in qbDict.items():
    try:
        goToQBGameLogs(qb, url)
        addToDataList(getCsv('csv_stats', qb), qb, 'R')
        goToPlayoffPage()
        addToDataList(getCsv('csv_stats_playoffs', qb), qb, 'P')
    except NoSuchElementException:
        continue
    

saveAsJson()
print('Done')

I also wrote something that will take the data and write it to a csv file. I separated the two functions because the scrapping part takes a really long time and I didn’t want the program to crash before saving the data somewhere. I think there are ways to learn how to scrape more efficiently, but for now, I think I want to move onto the next part of my project.

# python3
# convertToCsv.py - save json format to csv

import csv, json

def savetoCSV(data):
    with open('qbData.csv', 'w', newline='') as csvfile:
        fieldnames = list(set().union(*data))
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)

with open('data.json') as data_file:    
    masterData = json.load(data_file)
savetoCSV(masterData)

dataviz part 5 – more code cleaning

I received some more feedback from my friend after my last iteration. He introduced me to “List Comprehensions” that make writing for loops much cleaner. There was also a section where I was previously entering in the playerName and the seasonType an unnecessary number of times. I was also setting dictionary keys and values manually, but there are built in functions that make that much easier.

# python3
# footballStatScraper_v5.py - pull stats into a list

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException

qbList = ['Cam Newton']
browser = webdriver.Firefox()
browser.get('http://www.pro-football-reference.com/')
masterData = []

def qbSearch(qbName):
    searchElem = browser.find_element_by_name('search')
    searchElem.send_keys(qbName)
    searchElem.submit()

def testForQBProfile():
    try:
        goToGameLogs()
    except NoSuchElementException:
        browser.find_element_by_xpath("//div[@class='search-page-active']/div[contains(.,'QB')]//a").click()
        goToGameLogs()

def goToGameLogs():
    gameLogsElem = browser.find_element_by_link_text('Gamelogs [+]')
    gameLogsElem.click()
    browser.implicitly_wait(10)

def getCsv(seasonType):
    browser.find_element_by_xpath("//div[@tabindex='2']/span[contains(text(),'CSV')]").click()
    statList = FormatcsvText(browser.find_element_by_id(seasonType).text.split('\n'))
    return statList

def goToPlayoffPage():
    browser.get(browser.current_url + '/post')
    
def FormatcsvText(csvTextList):
    return [row.split(',') for row in csvTextList]

def addToDataList(statList, qbName, seasonType):
    fieldNames = defineFieldNames(statList)
    for row in range(0,len(statList)):
        dataDict = {'Player': qbName, 'seasonType': seasonType}
        if statList[row][0].isnumeric() and row > 1:
            dataDict.update(dict(zip(fieldNames,statList[row])))
            masterData.append(dataDict)

def defineFieldNames(statList):
    fieldNames = ['%s%s' % t for t in zip(statList[0], statList[1])]
    return ['Where' if field == '' else field for field in fieldNames]
    
for qb in qbList:
   qbSearch(qb)
   testForQBProfile()
   addToDataList(getCsv('csv_stats'), qb, 'R')
   goToPlayoffPage()
   addToDataList(getCsv('csv_stats_playoffs'), qb, 'P')

dataviz part 4 – Cleaning up the code

I met with a programming buddy of mine and got him to take a look at my code. He gave me some suggestions for best practices. Organizing different actions into functions, name them clearly so they can be read without comments, and reduce redundancies. I’m also compiling the data into a list of dictionaries now which allows for more control in data manipulation.

# python3
# footballStatScraper_v4.py - pull stats into a list

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException

qbList = ['Cam Newton']
browser = webdriver.Firefox()
browser.get('http://www.pro-football-reference.com/')
masterData = []

def qbSearch(qbName):
    searchElem = browser.find_element_by_name('search')
    searchElem.send_keys(qbName)
    searchElem.submit()

def testForQBProfile():
    try:
        goToGameLogs()
    except NoSuchElementException:
        browser.find_element_by_xpath("//div[@class='search-page-active']/div[contains(.,'QB')]//a").click()
        goToGameLogs()

def goToGameLogs():
    gameLogsElem = browser.find_element_by_link_text('Gamelogs [+]')
    gameLogsElem.click()
    browser.implicitly_wait(10)

def getCsv(seasonType):
    browser.find_element_by_xpath("//div[@tabindex='2']/span[contains(text(),'CSV')]").click()
    statList = FormatcsvText(browser.find_element_by_id(seasonType).text.split('\n'))
    return statList

def goToPlayoffPage():
    browser.get(browser.current_url + '/post')
    
def FormatcsvText(csvTextList):
    statList = []
    for row in csvTextList:
        statList.append(row.split(","))
    return statList

def addToDataList(statList, qbName, seasonType):
    fieldNamesList = []
    for row in range(0,len(statList)):
        dataDict = {}
        if row == 0:
            continue
        elif row == 1:
            fieldTitles = ', '.join('%s%s' % t for t in zip(statList[0], statList[1]))
            fieldNamesList = defineWhereField(fieldTitles.split(", "))
        else:
            if statList[row][0].isnumeric():
                for cell in range(0,len(fieldNamesList)):
                    dataDict['Player'] = qbName
                    dataDict['seasonType'] = seasonType
                    dataDict[fieldNamesList[cell]] = statList[row][cell]
                masterData.append(dataDict)
            else:
                continue

def defineWhereField(fieldList):
    for i in range(0,len(fieldList)):
        if fieldList[i] == '':
            fieldList[i] = 'Where'
        else:
            continue
    return fieldList
    
for qb in qbList:
   qbSearch(qb)
   testForQBProfile()
   addToDataList(getCsv('csv_stats'), qb, 'R')
   goToPlayoffPage()
   addToDataList(getCsv('csv_stats_playoffs'), qb, 'P')

Online Course v2

My first landing page didn’t work out too well. I think my second page worked a bit better.

I ran it with the same budget $20-$25 for a week on both Facebook and google. For Facebook, I changed the targeting from financial analysts to 16-23 year old college graduates who studied Finance or Accounting. For the same $20, my clicks went from 18 to 42. I kept the google campaign the same and the clicks went up marginally from 100 to 118.

I changed the wording in the ads slightly for beginners as well. Looking at this now, I should have used similar copy. I mentioned free-mini course in the facebook ad, but not the google ad.

fbad

googlead

In the first experiment, I included the free course on the landing page with a signup for more information (no one signed up). I changed it so that the viewer had to include their email to receive the free mini-course. Because of this, I can’t compare this aspect of the experiment.

In the new version, I had 15 people put their e-mail address down. The WP Email Capture plugin auto responds to a signup with a request for authentication. I included in this e-mail a link to the free course and a message the said to click the authentication link for more courses like this. I had one person click the authentication link. In the future, I would create a identical landing pages for both facebook and google ads so I can see how the signup rates differ by the viewers coming from each site.

landingpage2

For now, I’m putting this project on hold. Mostly because I’ve been finding myself really interested in other projects. I get the sense that the market for learning excel through shortcut keys might be a bit too niche. It might be better to take a broader approach and focus on basic excel tutorials.

Scott Adams mentioned that things that turn out well usually start out well and none of his ideas that started out badly ever ended up working out. The key is to learn from what you’re doing. I learned how to make a landing page with email signups. I’ve learned how to make videos with my computer. I’ve learned that I need to think about how to experiment and measure changes that are made. I’ve gotten a beginner’s introduction to facebook and google ads. I think this will help me test and experiment with ideas in the future.

Drew Brees Dataviz part 3 – Some New Charts and Automating more of the Code

I figured out how to scrape the pro-football-reference pages. Below is code that takes a list of QBs, downloads their stats, and consolidates the files into one folder.

# python3
# footballStatScraper.py - pull stats for specified qb's

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import os, shutil

url = 'http://www.pro-football-reference.com/'

qbList = ['Tom Brady', 'Joe Montana', 'Peyton Manning', 'Brett Favre', 'John Elway', 'Johnny Unitas', 'Dan Marino', 'Steve Young']
downloadPath = 'C:\\Users\\Yujin\\Desktop\\Work\\footballProject\\data\\dataDownload\\'
downloadTo = 'C:\\Users\\Yujin\\Desktop\\Work\\footballProject\\data\\csv\\'
# open web browser

fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList", 2)
fp.set_preference("browser.download.manager.showWhenStarting", False)
fp.set_preference("browser.download.dir", downloadPath)
fp.set_preference('browser.helperApps.neverAsk.saveToDisk', "text/csv")
browser = webdriver.Firefox(firefox_profile=fp)
browser.get(url)

# click search box and enter QB name
for qbName in qbList:
    searchElem = browser.find_element_by_name('search')
    searchElem.send_keys(qbName)
    searchElem.submit()

    try:
        gameLogsElem = browser.find_element_by_link_text('Gamelogs [+]')
    except NoSuchElementException:
        qbElem = browser.find_element_by_xpath("//div[@class='search-page-active']/div[contains(.,'QB')]//a")
        qbElem.click()
        gameLogsElem = browser.find_element_by_link_text('Gamelogs [+]')
        
    gameLogsElem.click()

    # Go to Gamelogs and click Export for Regular Season
    browser.implicitly_wait(10)
    exportElem = browser.find_element_by_xpath("//div[@tabindex='2']/span[6]")
    exportElem.click()

    # move and rename regular season files
    fileName = os.listdir(downloadPath)
    qbNameSplit = qbName.split()
    newFileName = qbNameSplit[0][0] + qbNameSplit[1] + 'R.csv'
    print('From: ' + downloadPath + fileName[0])
    print('To: ' + downloadTo + newFileName)
    shutil.move(downloadPath + fileName[0], downloadTo + newFileName)

    # download play off season stats
    gameLogsElem = browser.find_element_by_link_text('Gamelogs [+]')
    playOffUrl = gameLogsElem.get_attribute('href')[0:len(gameLogsElem.get_attribute('href'))-1] + 'post'
    browser.get(playOffUrl)    
    exportElem = browser.find_element_by_xpath("//div[@tabindex='2']/span[6]")
    exportElem.click()

    # Move playoff season stats
    fileName = os.listdir(downloadPath)
    newFileName = qbNameSplit[0][0] + qbNameSplit[1] + 'P.csv'
    print('From: ' + downloadPath + fileName[0])
    print('To: ' + downloadTo + newFileName)
    shutil.move(downloadPath + fileName[0], downloadTo + newFileName)

This next section takes all the csv files and combines them into one excel file.

# python3
# datacompiler.py - clean data and compile

import openpyxl, os, csv
from openpyxl.cell import get_column_letter, column_index_from_string
downloadTo = 'C:\\Users\\Yujin\\Desktop\\Work\\footballProject\\data\\csv\\'
dataPerQB = {}


for fileName in os.listdir(downloadTo):

    # open csv file
    csvFileObj = open(downloadTo + fileName)
    readerObj = csv.reader(csvFileObj)

    # look through second row and if not blank, combine with third row
    rowData = []
    headerRow = []

    for row in readerObj:
        if readerObj.line_num == 1:
            continue
        elif readerObj.line_num == 2:
            for cell in row:
                headerRow.append(cell)
        elif readerObj.line_num == 3:
            for cell in range(0,len(row)):
                headerRow[cell] = headerRow[cell] + row[cell]
                headerRow[6] = 'Where'
            rowData.append(headerRow)
        else:  
            if row[0].isnumeric() == True:
                rowData.append(row)
            else:
                continue

    # save rows into excel
    wb = openpyxl.load_workbook('C:\\Users\\Yujin\\Desktop\\Work\\footballProject\\data\\masterData.xlsx')
    wb.create_sheet(0)
    wbSheet = wb.get_sheet_by_name('Sheet')
    wbSheet.title = fileName[0:len(fileName)-4]
    
    for row in range(0,len(rowData)):
        for cell in range(0,len(rowData[row])):
            wbSheet[get_column_letter(cell+1) + str(row + 1)] = rowData[row][cell]

    wb.save('C:\\Users\\Yujin\\Desktop\\Work\\footballProject\\data\\masterData.xlsx')

This last section creates a table that consolidates all the data.

# python3
# masterData.py - Consolidate data into one table

import openpyxl, os, csv
from openpyxl.cell import get_column_letter, column_index_from_string
sheetNames = []
recordsPerQB = {}
fieldNames = ['uniqueID', 'Rk', 'player']


wb = openpyxl.load_workbook('C:\\Users\\Yujin\\Desktop\\Work\\footballProject\\data\\masterData.xlsx')
wb.remove_sheet(wb.get_sheet_by_name('Sheet1'))
sheetNames = wb.get_sheet_names()

# get number of records and all possible field titles
for sheet in sheetNames:
    activeSheet = wb.get_sheet_by_name(sheet)
    recordsPerQB[sheet] = activeSheet['A' + str(activeSheet.max_row)].value

    for column in range(0,activeSheet.max_column):
        if activeSheet[get_column_letter(column + 1) + str('1')].value in fieldNames:
            continue
        else:
            fieldNames.append(activeSheet[get_column_letter(column + 1) + str('1')].value)


# add fields to master data
wb.create_sheet(index = 0)
wbSheet = wb.get_sheet_by_name('Sheet')
wbSheet.title = 'data'


for field in range(0,len(fieldNames)):
    wbSheet[get_column_letter(field + 1) + str('1')].value = fieldNames[field]
    

# create row for each entry

# loop through qbMetric dictionary add Rk in Column B and Qb name in Column C
rowStart = 2
for key in recordsPerQB:
    print('Copying Stats for ' + key)
    for entry in range(0,int(recordsPerQB[key])):
        if rowStart == 2:
            wbSheet['A'+ str(rowStart)].value = 1
        else:
            wbSheet['A'+ str(rowStart)].value = '=A' + str(rowStart - 1) + '+ 1'
        wbSheet['B'+ str(rowStart)].value = str(entry + 1)
        wbSheet['C'+ str(rowStart)].value = key

        for formula in range(4, len(fieldNames)+1):

            matchPart = 'MATCH('+ get_column_letter(formula) + '$1,INDIRECT("\'"&$C' + str(rowStart) + '&"\'!1:1\"), FALSE)'
            vlookupArray = 'INDIRECT("\'"&$C' + str(rowStart) + '&"\'!$A$1:$' + get_column_letter(wb.get_sheet_by_name(key).max_column) + '$' + str(wb.get_sheet_by_name(key).max_row) + '")'
            vLookup = 'VLOOKUP($B' + str(rowStart) + ', ' + vlookupArray + ',' +  matchPart + ', FALSE)'
            ifError = '=IFERROR(' + vLookup + ', "")'
            wbSheet[get_column_letter(formula)+ str(rowStart)].value = ifError
            
        rowStart += 1


print('Done')
wb.save('C:\\Users\\Yujin\\Desktop\\Work\\footballProject\\data\\masterData.xlsx')

TODO: See if I can improve the scraping section. Also, instead consolidating into excel and using formulas, see if I can manage everything through python code.

Drew Brees Dataviz part 2 – Building Viz on Tableau

I first looked at total TDs (rushing and passing). I assumed TDs are an important metric for QB’s. It’s easy to make complicated charts, but it’s usually more effective to keep charts simple. I made a calculated field “if [Player] = “Dbrees” then 1 else 0 END” for Drew Brees and moved it to the color card to set Drew Brees apart form everyone else.

TDchart

Next, I wanted to look at the stat on a per game basis to try to normalize for number of seasons played. I made a new calculated field for TD/Games = sum([Passing & Rushing TD’s]) / COUNTD([Games]). Games is a unique identifier for each row of data.

tdpergamechart

Then I wanted to look at interceptions per game (sum([Passing Int]) / COUNTD([Games])). For each chart, I would copy the previous tab so I could keep the formatting and the Drew brees color marker.

intpergame

I thought seeing a TD to int ratio (sum([Passing TD]) / sum([Passing Int])) might be interesting to look at. I’m not a hardcore football stat fan, so I don’t know if people look at this, but I wanted some sort of measure for risk-taking or maybe precision?

tdtoint

I used similar methodologies for total yards, yards per game, and completions. For completion % I had to make a new field (sum([Passing Cmp]) / sum([Passing Att])).

For Win-Loss info, I had to make a new field because the results only came in a “W 22-24” format. I made a field called Win-Loss = if left([Result],1) = ‘W’ then ‘W’ else ‘L’ END. I set this filter for ‘W’ for the wins chart and ‘L’ for the Losses chart.

wins

The Win % was calculated as sum([winCount]) / countd([Games]) with winCount = if [Wins-Losses] = ‘W’ then 1 else 0 END. For Playoff games, I just used the filter for playoffs I had created with the master dataset.

I didn’t have a great way to extract the superbowl wins here so I just manually looked up this section and uploaded a new excel document to make the last chart.

superbowl

I put together all the charts in story mode. I created a dashboard tab for the first slide and uploaded the rankings as an image. I then organized the charts and put in some commentary. Finally, I uploaded this finished story to the Tableau Public server and posted it on my page.

story

So that’s my first dataviz project. Next step, I’d like to streamline and automate some of the data crawling and cleaning aspects. Then I’d like to do some more analysis and see if I can find anything else interesting. Thanks for following along!

Drew Brees Dataviz part 1 – Getting the Data

Drew Brees Underrated?

I was looking through some football stats and noticed somewhere Drew Brees had some really impressive stats, but never seemed to be mentioned in the conversation for “All-time Top QB’s”. I decided to do some digging on the topic.

I looked up best QBs of all time and recorded the rankings from the top 5 articles that appeared on Google

Top rankings

I tried to create a webscraper on Python. I only started learning Python/Programming a few weeks ago so I’m pretty new to all this. I wanted to create a program that would go through and download the gamelog data for each QB on my list.

football Scraper

I had learned some simple scraping, but I wasn’t able to get the element for the “Export” button. I think I might need to learn some Javascript? I decided I wanted to go ahead and do some analysis so I manually downloaded some of the stats for my target QBs. I’d like to come back to this part later so I can automatically grab more data for other QBs.

exportdata

The raw data for each QB didn’t come in a easily usable format. The fields were on separate rows. I needed to combine the rows to make a single row for field names. I then highlighted the data and sorted the “Rk” field and deleted everything else.

rawData

I wanted to combine all the data into one table. I started by moving all the sheets into one excel file and renaming the sheet based on the QB name and whether the data was from playoffs or regular season e.g. “SyoungR, SyoungP, etc”. This is also another area, I think I could automate that I want to look into. Each sheet had the games numbered. I created a formula that would find the max game number for each QB for playoff and regular seasons.

numberofentries

Next, I created a python program that would create the number of lines I wanted for each QB based on number of playoff and regular season games.

dataRowCreator

The output below could then be used to consolidate all the sheets.

rowcreateroutput

I found QB’s didn’t have the same field titles. So I created a formula that would find the right column and pull based on the tab name.

masterdataformula

This is the combined dataset.

stats_v3