My collections.: 2013

Friday, October 11, 2013

Valid Number

Borrow the solution from here: Use DFA to solve the problem.
http://discuss.leetcode.com/questions/241/valid-number

class Solution {
public:
    enum token {
        space,
        digit,
        sign,
        dot,
        exp,
        invalid
    };
    
    bool isNumber(const char *s) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        int table[][6] = {
            {0, 1, 2, 3, -1, -1},
            {8, 1, -1, 4, 5, -1}, 
            {-1, 1, -1, 3, -1, -1}, 
            {-1, 4, -1, -1, -1, -1},
            {8, 4, -1, -1, 5, -1},
            {-1, 7, 6, -1, -1, -1},
            {-1, 7, -1, -1, -1, -1},
            {8, 7, -1, -1, -1, -1},
            {8, -1, -1, -1, -1, -1}
        };
        token curr_token;
        int curr_state = 0;
        while (*s != '\0') {
            curr_token = invalid;
            if (isdigit (*s))
                curr_token = digit;
            if (isspace (*s))
                curr_token = space;
            if (*s=='+' || *s=='-')
                curr_token = sign;
            if (*s == '.')
                curr_token = dot;
            if (*s=='e' || *s=='E')
                curr_token = exp;
            curr_state = table[curr_state][curr_token];
            if (curr_state == -1)
                return false;
            s++;
        }
        return (curr_state==1 || curr_state==4 || curr_state==7 || curr_state==8);
    }
};

Saturday, October 5, 2013

Implement strStr

class Solution {
public:
    char *strStr(char *haystack, char *needle) {
        // Note: The Solution object is instantiated only once and is reused by each test case.
        int len_h = strlen(haystack);
        int len_n = strlen(needle);
        if (len_h < len_n)
            return NULL;
        if (len_n == 0)
            return haystack;
        
        char *p_h = haystack, *p_n = needle;
        while (p_h <= haystack+len_h-len_n) {
            if (*p_h != *p_n) {
                p_h++;
            }
            else {
                int i;
                for (i = 0; i < len_n; i++) {
                    if (*(p_h+i) != *(p_n+i))
                        break;
                }
                if (i == len_n)
                    return p_h;
                else
                    p_h++;
            }
        }
        return NULL;
    }
};

Tuesday, October 1, 2013

Install numpy and scipy on Mac

Found a useful link about how to install numpy and scipy on mac.

https://github.com/fonnesbeck/ScipySuperpack

Tuesday, September 17, 2013

The image is represented in a two-dimension array. Every pixel of the image can only to assigned to either 0 or 1. If a pixel is 0, just leave it there. If a pixel is 1, we check its four neighbors. If all its four neighbors are 1, leave it there. Otherwise, erode it to 0.

Example:

0 1 1 1 0 0 0 1 0 0

1 1 1 0 0 --> 0 1 0 0 0

0 1 0 1 1 0 0 0 0 0

Solution:

The difficult part is that every pixel can only be assigned to either 0 or 1. Otherwise, we can assign the 1s which need erosion first to some other value in the first scan. In the second scan, we can change this value to 0.

So here we use to arrays to store the row index and column index for the pixel need erosion.
The problem with this method is that we use to much memory to store the row index and column index. One way to optimize is that we only store the index for two rows. Because the erosion of row 0 will not affect the check for row 2.

bool check (int x, int y, int image[H][M]) {
    if (x>0 && image[x-1][y]==0)
        return true;
    if (x<H-1 && image[x+1][y]==0)
        return true;
    if (y>0 && image[x][y-1]==0)
        return true;
    if (y<W-1 && image[x][y+1]==0)
        return true;
    return false;
}

void erosion_part (vector<int> &row, vector<int> &col, int image[H][M]) {
    for (int i = 0; i < row.size(); i++)
        image[row[i]][col[i]] = 0;
}

void erosion (int image[H][W]) {
    vector<int> row, col;
    for (int i = 0; i < H; i++) {
        for (int j = 0; j < W; j++) {
            if (check(i, j, image)) {
                row.push_back(i);
                col.push_back(j);
            }
        }
    }
    erosion_part (row, col, image);
}

The problem with this method is that we use to much memory to store the row index and column index. One way to optimize is that we only store the index for two rows. Because the erosion of row 0 will not affect the check for row 2.

void erosion_part (vector<int> &row, vector<int> &col, int image[H][M], int end) {
    while (!row.empty()) {
        if (row.front() > end)
            break;
        image[row.front()][col.front()] = 0;
        row.pop_front();
        col.pop_front();
    }
}

void erosion (int image[H][W]) {
    list<int> row, col;
    int start = 0;
    for (int i = 0; i < H; i++) {
        if (i-start >= 2) {
            erosion_part (row, col, image, start);
            row.clear();
            col.clear();
            start++;
        }
        for (int j = 0; j < W; j++) {
            if (check(i, j, image)) {
                row.push_back(i);
                col.push_back(j);
            }
        }
    }
    erosion_part (row, col, H-1);
}

Longest common prefix

Given an array of strings, find the longest common prefix.
Example:
["abc", "a", "ab"] --> "a"
["abcd", "ab", "abc] --> "ab"

Solution:

string lcs (vector<string> array) {
    if (array.empty())
        return "";
    int pos = 0;
    string result = "";
    for (int i = 0; i < array[0].size(); i++) {
        char target = array[0][i];
        for (int j = 1; j < array.size(); j++) {
            if (i >= array[j].size() || array[j][i] != target)
            //a small trick here is that the length of the first string may be longer that some of other 
            //strings in the array. We need to test whether we have reached the end of other strings
                return result;
            else
                result += array[0][i];
        }
    }
    return result;
}

Build a tree from a xml file

Give a xml file like this,
<a>
<b>Text<\b>
<c>
<d>Computer Science<\d>
<d>Linear Algebra<\d>
<\c>
<e> Hello world <\e>
<\a>
Convert the file into a tree
<a>
/ \ \
<b> <c> <e>
/ \
<d> <d>
Note the result don't have to be a binary tree. To functions are given.
1. getToke(): It returns two values Type and Text. Type includes BEGIN, TEXT, END. Text is the actual content of the token.
2. hasNext(): It will return true until the scan hit the end of the file.

Solution:
You have to first decide how to store the tree structure. Then another problem is how to differentiate between parent and children. We use a stack to store the current parent.

One thing to be careful about is that the xml file may be not in the correct format, i.e. <a>text<\c>. Return NULL in this case.

class Node {
public:
    string name;
    string content;
    vector<Node *> children;
    Node (string n, string c = "");
};

struct Token {
    Type type;
    String content;
};

Node * buildTree (){
    Token t = getToken();
    Node * root = new Node (root.content);
    stack<Node *> s;
    s.push (root);
    
    while (hasNext()) {
        t = getToken();
        if (t.type == BEGIN) {
            Node * tmp = new Node (t.content);
            s.top->children.push_back (tmp);
            s.push (tmp);
            continue;
        }
        if (t.type == TEXT) {
            tmp->content += t.content;
            continue;
        }
        if (t.type == END) {
            //Here you need to check whether the format of xml file is correct
            //If the type of token doesn't match, return NULL
            if (t.content == s.top())
                s.pop();
            else
                return NULL;
        }
    }
    //if the stack is not empty at the end, xml is not correct formatted, return NULL
    if (!s.empty())
        return NULL;
    else
        return root;
}

Tuesday, June 25, 2013

private vs protected vs publis

I'm always confused by the member type in class of c++. There are three types: private, protected and public. In inheritance, there are also there specifiers, private, protected and public. After doing some research on Internet, found two good tutorials for this.

http://www.parashift.com/c++-faq-lite/access-rules.html

http://www.learncpp.com/cpp-tutorial/115-inheritance-and-access-specifiers/

Have fun!

Tuesday, May 7, 2013

Notes for blackjack

To prepare the interviews, I have to delve into 'Crack the Code Interview'. To better understand the OO chapter, I decided to work on a 400-line project, blackjack. I found some good source code online and I'll take notes here.

1. enum
http://www.enel.ucalgary.ca/People/Norman/enel315_winter1997/enum_types/

2. C++ operator overload
http://courses.cms.caltech.edu/cs11/material/cpp/donnie/cpp-ops.html

3. Polymorphism: Virtual member function, abstract function
http://www.cplusplus.com/doc/tutorial/polymorphism/

4. protected specifier
http://msdn.microsoft.com/en-us/library/e761de5s(v=vs.71).aspx

5. Can we pass a derived class if the function is expecting the base class as argument?

If you have a function void foo(base b) and pass my_derived to it, that derived is converted to base, loosing any information that was not in base. You can't recover that information in any way.

If you have void foo(base* b) and pass &my_derived to it, no conversion occurs. foo only thinks that it is working with a base. You still can't use members of derived though. foo must be written in such way that it works with base itself or its other children that don't have that member. It is possible, however, to check that b is in fact pointing to a derived object and cast it to derived* using dynamic_cast or some virtual function.

6. Slicing

If You have a base class A and a derived class B, then You can do the following.

void wantAnA(A myA)
{
   // work with myA
}

B derived;
// work with the object "derived"
wantAnA(derived);

Now the method wantAnA needs a copy of derived. However, the object derivedcannot be copied completely, as the class B could invent additional member variables which are not in its base class A.

Therefore, to call wantAnA, the compiler will "slice off" all additional members of the derived class. The result might be an object you did not want to create, because

it may be incomplete,
it behaves like an A-object (all special behaviour of the class B is lost).

7. const issues

When doing the coding, I noticed a scenario like this.

void aClass::foo const () {

vector<T>::iterator iter; //This is wrong since it's within a const function

vector<T>::const_iterator iter; //This works fine

}

7.1 The const for foo means that within the function, the value of the class members will not be changed.

class aClass {

private:

int a;

int *b;

};

aClass::foo const {

a = 5; //Wrong

int x = a; //OK

*b = 100; //OK, it's changing the variable pointed to by b, not b itself

b = &x; //Wrong

}

To fully understand this, it's necessary to notice the difference between const pointer and pointer to const variables. Here is a good demo about this.

Back to the iterator, const_iterator acts like a pointer to const variable and iterator acts like a normal pointer. I didn't delve into the reason, just remember for a const function, if you need to use an iterator, const_iterator is always safer than iterator.

Saturday, April 27, 2013

'\r' vs '\n'

I've been confused by the difference between \r and \n for a long time. Now I found a really wonderful answer from Stackoverflow!

In terms of ascii code, it's 3 -- since they're 10 and 13 respectively;-).

But seriously, there are many:

in Unix and all Unix-like systems, \n is the code for end-of-line, \r means nothing special
as a consequence, in C and most languages that somehow copy it (even remotely), \n is the standard escape sequence for end of line (translated to/from OS-specific sequences as needed)
in old Mac systems (pre-OS X), \r was the code for end-of-line instead
in Windows (and many old OSs), the code for end of line is 2 characters, `\r\n', in this order
as a (surprising;-) consequence (harking back to OSs much older than Windows),\r\n is the standard line-termination for text formats on the Internet
for electromechanical teletype-like "terminals", \r commands the carriage to go back leftwards until it hits the leftmost stop (a slow operation), \n commands the roller to roll up one line (a much faster operation) -- that's the reason you always have \r before \n, so that the roller can move while the carriage is still going leftwards!-)
for character-mode terminals (typically emulating even-older printing ones as above), in raw mode, \r and \n act similarly (except both in terms of the cursor, as there is no carriage or roller;-)

In practice, in the modern context of writing to a text file, you should always use \n(the underlying runtime will translate that if you're on a weird OS, e.g., Windows;-). The only reason to use \r is if you're writing to a character terminal (or more likely a "console window" emulating it) and want the next line you write to overwrite the last one you just wrote (sometimes used for goofy "ascii animation" effects of e.g. progress bars) -- this is getting pretty obsolete in a world of GUIs, though;-).

PS: when I coded in Xinu, things work like this.
/***Without '\r'*****************/
for (int i = 0; i < 3; i++)
kprintf ("Hello World\n");
/*******************************/
Then the output is:
Hello World
Hello World
Hello World

/*******With '\r'******************/
for (int i = 0; i < 3; i++)
kprintf ("Hello World\r\n");
/*********************************/

Output is:
Hello World
Hello World
Hello World

So I guess we can understand like this. '\n' moves the cursor to the next line. '\r' moves cursor to the leftmost of the current line.

Wednesday, March 20, 2013

strace in Linux

I ran into a interesting blog about strace.

http://timetobleed.com/hello-world/

time command in Linux

When working on cs252 lab, I was confused by the meaning of the output of time command. By searching the web, something useful was found.

http://www.dirac.org/linux/time/

To summarize,

Real is wall clock time - time from start to finish of the call. This is all elapsed time including time slices used by other processed and time the process spends blocked (for example if it is waiting for I/O to complete).

User is the amount of CPU time spent in user-mode code (outside the kernel) within the process. This is only actual CPU time used in executing the process. Other processes and time the process spends blocked do not count towards this figure.

Sys is the amount of CPU time spent in the kernel within the process. This means executing CPU time spent in system calls within the kernel, as opposed to library code, which is still running in user-space. Like 'user', this is only CUP time used by the process.

The equation is
user + Sys will tell you how much actual CPU time your process used.

Tuesday, March 5, 2013

Tutorial for Lex

A short tutorial for Lex.

http://dinosaur.compilertools.net/lex/

Saturday, March 2, 2013

Tutorial for regular expression

Find a very good tutorial for regular expression!

http://www.regular-expressions.info/quickstart.html

This is a quick start for regular expression. Within this article, there is also a detailed version.

Write script to login a remote machine

As a cs student, I have remotely login many times every day. The name of the remote machine is pretty long and my password is even longer. So I decided to do something.

Firstly, I wrote a scipt like this

#!/bin/bash
ssh username@machinename

Here, username stands for your login name and machinename represents the address of the remote machine.

By running this script, I saved a lot of time to type the long name of the machine. But, another question is I still have to enter the code after running the script. I searched the Internet, keygen seems to be a cool command for me.

http://www.debian-administration.org/articles/152

This website has detailed instructions about how to use keygen. For mac users, we have no ssh-copy-id command. I found an alternative way to copy the public key to the server. The command is

cat ~/cat ~/.ssh/id_rsa.pub | ssh username@machinename "mkdir ~/.ssh; cat >> ~/.ssh/authorized_keys"

install Ctags and Taglist without root

Ctags and Taglist seem to be quite popular plugins for vim editor. Since I'm novice to Linux. It took me several hours to figure out how to configure them for my vim on the department machine. It's kind of tricky because I have no root right on that machine.

I mainly borrowed methods from the following three places,

http://www.thegeekstuff.com/2009/04/ctags-taglist-vi-vim-editor-as-sourece-code-browser/

This one intrigues my curiosity to try out Ctags and Taglist.

http://superuser.com/questions/66367/is-it-possible-to-install-ctags-without-root-privs

The above site introduces how to install Ctags without root.

http://vim-taglist.sourceforge.net/installation.html

The last one is for installing taglist without root.

The main idea of installing Ctags without root is to compile it by yourself and install it in your home directory.

First download Ctags from its webpage and type in the following commands.

$ tar zxf ctags-5.8.tar.gz
$ cd ctags-5.8
$ ./configure --prefix=$HOME
$ make && make install

This will compile and install ctags in your home directory. The resulting binary will be: $HOME/bin/ctags

That's all I did to make Ctags work in my vim. If it doesn't work for you. Just check the second webpage. I omitted several steps which seems not critical to me.

For the Taglist, just follow the instructions in the third page. I only finished the first step - download taglist.zip and uzip it to .vim folder and it works.

The first website contains detailed description about how to use Ctags and taglist in vim. Have fun!

Saturday, February 16, 2013

Fix vertical split of screen command in Mac

Screen is a cool command to split the terminal. In Mac, however, only the horizontal split works. I searched the web for a long time and luckily find a way to fix this problem!

$ cvs -z3 -d:pserver:anonymous@cvs.savannah.gnu.org:/sources/screen co screen
$ curl http://old.evanmeagher.net/files/gnu-screen-vertsplit.patch > gnu-screen-vertsplit.patch
$ cd screen/src
$ patch < ../../gnu-screen-vertsplit.patch
$ ./configure --enable-locale --enable-telnet --enable-colors256 --enable-rxct_osc
$ make
$ sudo make install

Good luck to you all!

Wednesday, January 9, 2013

Quadratic sorting algorithm

Bubble sort:
Time complexity: O(n^2)
Can be used to detect whether an array is sorted.

Selection sort:
Time complexity: O(n^2)

Insertion sort:
Time complexity: O(n^2)
Very efficient on almost sorted array.
In practice, it is more efficient than other simple quadratic sorting algorithms such as selection sort and bubble sort.

Here are the source code for the sorting algorithms above.

void mySort::myBubbleSort(int *Q, int begin, int end){
bool isSorted=0;
int temp;
while(!isSorted){
isSorted=1;
for(int i=0; i<end; i++){
if(Q[i]>Q[i+1]){
temp=Q[i];
Q[i]=Q[i+1];
Q[i+1]=temp;
isSorted=0;
}
}
}
}

myMin mySort::myFindMin(int *Q, int begin, int end){
myMin minValue;
minValue.value=Q[begin];
minValue.pos=begin;
for(int i=begin; i<=end; i++){
if(Q[i]<minValue.value){
minValue.value=Q[i];
minValue.pos=i;
}
}
return minValue;
}
void mySort::mySelectionSort(int *Q, int begin, int end){
myMin minValue;
int temp;
for(int i=0; i<end;i++){
minValue=myFindMin(Q,i,end);
//cout<<"The minimum element is:"<<minValue.value<<" @ "<<minValue.pos<<endl;
temp=Q[i];
Q[i]=Q[minValue.pos];
Q[minValue.pos]=temp;
}
}

void mySort::myInsertionSort(int *Q, int begin, int end){
int temp;
int j;
for(int i=1; i<=end; i++){
temp=Q[i];
for(j=i-1; j>=0;j--){
if(Q[j]>temp)
Q[j+1]=Q[j];
else{
break;
}
}
Q[j+1]=temp;
}
}

Heap and Heap sort

The following is the code of basic heap operations and heap sort.

//Here is the header file myHeap.h

#ifndef MYHEAP_H
#define MYHEAP_H
class myHeap{
int heapSize;
public:
int parent(int i){
return i/2;
}

int left(int i){
return 2*i;
}

int right(int i){
return 2*i+1;
}

void maxHeapify(int *A, int i);
void makeHeap(int *A, int size);
void myHeapSort(int *A, int size);
};

#endif

//Here is the source file myHeap.cpp

#include "myHeap.h"
void myHeap::maxHeapify(int *A, int i){
int l=left(i);
int r=right(i);
int temp=0;
int large;

if(l<=heapSize && A[l-1]>A[i-1]){
large=l;
}
else
large=i;

if(r<=heapSize && A[r-1]>A[large-1]){
large=r;
}

if(large!=i){
temp=A[i-1];
A[i-1]=A[large-1];
A[large-1]=temp;
maxHeapify(A, large);
}

}

void myHeap::makeHeap(int *A, int size){
heapSize=size;
for(int i=heapSize/2; i>0; i--)
maxHeapify(A,i);

}

void myHeap::myHeapSort(int *A, int size){
makeHeap(A,size);
for(int i=heapSize;i>1;i--){
int temp=A[0];
A[0]=A[i-1];
A[i-1]=temp;
heapSize--;
maxHeapify(A,1);
}
}

Things need to pay close attention to when designing the heap code:

1) The index of an array begins from zero. The index of a heap tree begins from one.

2) The use of "large" in maxHeapify is quite beautiful. You need to first compare A[l] with A[i] and assign the larger one to large. Then compare A[r] and A[large].